Bug Summary

File:build/gcc/tree-vect-loop.cc
Warning:line 6305, column 7
Value stored to 'epilog_stmt' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-suse-linux -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name tree-vect-loop.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model static -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/buildworker/marxinbox-gcc-clang-static-analyzer/objdir/gcc -resource-dir /usr/lib64/clang/15.0.7 -D IN_GCC -D HAVE_CONFIG_H -I . -I . -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/. -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../include -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libcpp/include -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libcody -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libdecnumber -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libdecnumber/bid -I ../libdecnumber -I /buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libbacktrace -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13 -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/x86_64-suse-linux -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/backward -internal-isystem /usr/lib64/clang/15.0.7/include -internal-isystem /usr/local/include -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/13/../../../../x86_64-suse-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-narrowing -Wwrite-strings -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fdeprecated-macro -fdebug-compilation-dir=/buildworker/marxinbox-gcc-clang-static-analyzer/objdir/gcc -ferror-limit 19 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=plist-html -analyzer-config silence-checkers=core.NullDereference -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /buildworker/marxinbox-gcc-clang-static-analyzer/objdir/clang-static-analyzer/2023-03-27-141847-20772-1/report-XQOczW.plist -x c++ /buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc
1/* Loop Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4 Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#define INCLUDE_ALGORITHM
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "target.h"
28#include "rtl.h"
29#include "tree.h"
30#include "gimple.h"
31#include "cfghooks.h"
32#include "tree-pass.h"
33#include "ssa.h"
34#include "optabs-tree.h"
35#include "diagnostic-core.h"
36#include "fold-const.h"
37#include "stor-layout.h"
38#include "cfganal.h"
39#include "gimplify.h"
40#include "gimple-iterator.h"
41#include "gimplify-me.h"
42#include "tree-ssa-loop-ivopts.h"
43#include "tree-ssa-loop-manip.h"
44#include "tree-ssa-loop-niter.h"
45#include "tree-ssa-loop.h"
46#include "cfgloop.h"
47#include "tree-scalar-evolution.h"
48#include "tree-vectorizer.h"
49#include "gimple-fold.h"
50#include "cgraph.h"
51#include "tree-cfg.h"
52#include "tree-if-conv.h"
53#include "internal-fn.h"
54#include "tree-vector-builder.h"
55#include "vec-perm-indices.h"
56#include "tree-eh.h"
57#include "case-cfn-macros.h"
58
59/* Loop Vectorization Pass.
60
61 This pass tries to vectorize loops.
62
63 For example, the vectorizer transforms the following simple loop:
64
65 short a[N]; short b[N]; short c[N]; int i;
66
67 for (i=0; i<N; i++){
68 a[i] = b[i] + c[i];
69 }
70
71 as if it was manually vectorized by rewriting the source code into:
72
73 typedef int __attribute__((mode(V8HI))) v8hi;
74 short a[N]; short b[N]; short c[N]; int i;
75 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
76 v8hi va, vb, vc;
77
78 for (i=0; i<N/8; i++){
79 vb = pb[i];
80 vc = pc[i];
81 va = vb + vc;
82 pa[i] = va;
83 }
84
85 The main entry to this pass is vectorize_loops(), in which
86 the vectorizer applies a set of analyses on a given set of loops,
87 followed by the actual vectorization transformation for the loops that
88 had successfully passed the analysis phase.
89 Throughout this pass we make a distinction between two types of
90 data: scalars (which are represented by SSA_NAMES), and memory references
91 ("data-refs"). These two types of data require different handling both
92 during analysis and transformation. The types of data-refs that the
93 vectorizer currently supports are ARRAY_REFS which base is an array DECL
94 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
95 accesses are required to have a simple (consecutive) access pattern.
96
97 Analysis phase:
98 ===============
99 The driver for the analysis phase is vect_analyze_loop().
100 It applies a set of analyses, some of which rely on the scalar evolution
101 analyzer (scev) developed by Sebastian Pop.
102
103 During the analysis phase the vectorizer records some information
104 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
105 loop, as well as general information about the loop as a whole, which is
106 recorded in a "loop_vec_info" struct attached to each loop.
107
108 Transformation phase:
109 =====================
110 The loop transformation phase scans all the stmts in the loop, and
111 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
112 the loop that needs to be vectorized. It inserts the vector code sequence
113 just before the scalar stmt S, and records a pointer to the vector code
114 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
115 attached to S). This pointer will be used for the vectorization of following
116 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
117 otherwise, we rely on dead code elimination for removing it.
118
119 For example, say stmt S1 was vectorized into stmt VS1:
120
121 VS1: vb = px[i];
122 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
123 S2: a = b;
124
125 To vectorize stmt S2, the vectorizer first finds the stmt that defines
126 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
127 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
128 resulting sequence would be:
129
130 VS1: vb = px[i];
131 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
132 VS2: va = vb;
133 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
134
135 Operands that are not SSA_NAMEs, are data-refs that appear in
136 load/store operations (like 'x[i]' in S1), and are handled differently.
137
138 Target modeling:
139 =================
140 Currently the only target specific information that is used is the
141 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
142 Targets that can support different sizes of vectors, for now will need
143 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
144 flexibility will be added in the future.
145
146 Since we only vectorize operations which vector form can be
147 expressed using existing tree codes, to verify that an operation is
148 supported, the vectorizer checks the relevant optab at the relevant
149 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
150 the value found is CODE_FOR_nothing, then there's no target support, and
151 we can't vectorize the stmt.
152
153 For additional information on this project see:
154 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
155*/
156
157static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *,
158 unsigned *);
159static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
160 bool *, bool *, bool);
161
162/* Subroutine of vect_determine_vf_for_stmt that handles only one
163 statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
164 may already be set for general statements (not just data refs). */
165
166static opt_result
167vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
168 bool vectype_maybe_set_p,
169 poly_uint64 *vf)
170{
171 gimple *stmt = stmt_info->stmt;
172
173 if ((!STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
174 && !STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live)
175 || gimple_clobber_p (stmt))
176 {
177 if (dump_enabled_p ())
178 dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
179 return opt_result::success ();
180 }
181
182 tree stmt_vectype, nunits_vectype;
183 opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info,
184 &stmt_vectype,
185 &nunits_vectype);
186 if (!res)
187 return res;
188
189 if (stmt_vectype)
190 {
191 if (STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype)
192 /* The only case when a vectype had been already set is for stmts
193 that contain a data ref, or for "pattern-stmts" (stmts generated
194 by the vectorizer to represent/replace a certain idiom). */
195 gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p
) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 197, __FUNCTION__), 0 : 0))
196 || vectype_maybe_set_p)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p
) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 197, __FUNCTION__), 0 : 0))
197 && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p
) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 197, __FUNCTION__), 0 : 0))
;
198 else
199 STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = stmt_vectype;
200 }
201
202 if (nunits_vectype)
203 vect_update_max_nunits (vf, nunits_vectype);
204
205 return opt_result::success ();
206}
207
208/* Subroutine of vect_determine_vectorization_factor. Set the vector
209 types of STMT_INFO and all attached pattern statements and update
210 the vectorization factor VF accordingly. Return true on success
211 or false if something prevented vectorization. */
212
213static opt_result
214vect_determine_vf_for_stmt (vec_info *vinfo,
215 stmt_vec_info stmt_info, poly_uint64 *vf)
216{
217 if (dump_enabled_p ())
218 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
219 stmt_info->stmt);
220 opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf);
221 if (!res)
222 return res;
223
224 if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p
225 && STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt)
226 {
227 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq;
228 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
229
230 /* If a pattern statement has def stmts, analyze them too. */
231 for (gimple_stmt_iterator si = gsi_start (pattern_def_seq);
232 !gsi_end_p (si); gsi_next (&si))
233 {
234 stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
235 if (dump_enabled_p ())
236 dump_printf_loc (MSG_NOTE, vect_location,
237 "==> examining pattern def stmt: %G",
238 def_stmt_info->stmt);
239 res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf);
240 if (!res)
241 return res;
242 }
243
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "==> examining pattern statement: %G",
247 stmt_info->stmt);
248 res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf);
249 if (!res)
250 return res;
251 }
252
253 return opt_result::success ();
254}
255
256/* Function vect_determine_vectorization_factor
257
258 Determine the vectorization factor (VF). VF is the number of data elements
259 that are operated upon in parallel in a single iteration of the vectorized
260 loop. For example, when vectorizing a loop that operates on 4byte elements,
261 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
262 elements can fit in a single vector register.
263
264 We currently support vectorization of loops in which all types operated upon
265 are of the same size. Therefore this function currently sets VF according to
266 the size of the types operated upon, and fails if there are multiple sizes
267 in the loop.
268
269 VF is also the factor by which the loop iterations are strip-mined, e.g.:
270 original loop:
271 for (i=0; i<N; i++){
272 a[i] = b[i] + c[i];
273 }
274
275 vectorized loop:
276 for (i=0; i<N; i+=VF){
277 a[i:VF] = b[i:VF] + c[i:VF];
278 }
279*/
280
281static opt_result
282vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
283{
284 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
285 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
286 unsigned nbbs = loop->num_nodes;
287 poly_uint64 vectorization_factor = 1;
288 tree scalar_type = NULL_TREE(tree) nullptr;
289 gphi *phi;
290 tree vectype;
291 stmt_vec_info stmt_info;
292 unsigned i;
293
294 DUMP_VECT_SCOPE ("vect_determine_vectorization_factor")auto_dump_scope scope ("vect_determine_vectorization_factor",
vect_location)
;
295
296 for (i = 0; i < nbbs; i++)
297 {
298 basic_block bb = bbs[i];
299
300 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
301 gsi_next (&si))
302 {
303 phi = si.phi ();
304 stmt_info = loop_vinfo->lookup_stmt (phi);
305 if (dump_enabled_p ())
306 dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G",
307 (gimple *) phi);
308
309 gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 309, __FUNCTION__), 0 : 0))
;
310
311 if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
312 || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live)
313 {
314 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info))((void)(!(!(stmt_info)->vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 314, __FUNCTION__), 0 : 0))
;
315 scalar_type = TREE_TYPE (PHI_RESULT (phi))((contains_struct_check ((get_def_from_ptr (gimple_phi_result_ptr
(phi))), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 315, __FUNCTION__))->typed.type)
;
316
317 if (dump_enabled_p ())
318 dump_printf_loc (MSG_NOTE, vect_location,
319 "get vectype for scalar type: %T\n",
320 scalar_type);
321
322 vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
323 if (!vectype)
324 return opt_result::failure_at (phi,
325 "not vectorized: unsupported "
326 "data-type %T\n",
327 scalar_type);
328 STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = vectype;
329
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
332 vectype);
333
334 if (dump_enabled_p ())
335 {
336 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
337 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
338 dump_printf (MSG_NOTE, "\n");
339 }
340
341 vect_update_max_nunits (&vectorization_factor, vectype);
342 }
343 }
344
345 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
346 gsi_next (&si))
347 {
348 if (is_gimple_debug (gsi_stmt (si)))
349 continue;
350 stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
351 opt_result res
352 = vect_determine_vf_for_stmt (loop_vinfo,
353 stmt_info, &vectorization_factor);
354 if (!res)
355 return res;
356 }
357 }
358
359 /* TODO: Analyze cost. Decide if worth while to vectorize. */
360 if (dump_enabled_p ())
361 {
362 dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
363 dump_dec (MSG_NOTE, vectorization_factor);
364 dump_printf (MSG_NOTE, "\n");
365 }
366
367 if (known_le (vectorization_factor, 1U)(!maybe_lt (1U, vectorization_factor)))
368 return opt_result::failure_at (vect_location,
369 "not vectorized: unsupported data-type\n");
370 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor;
371 return opt_result::success ();
372}
373
374
375/* Function vect_is_simple_iv_evolution.
376
377 FORNOW: A simple evolution of an induction variables in the loop is
378 considered a polynomial evolution. */
379
380static bool
381vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
382 tree * step)
383{
384 tree init_expr;
385 tree step_expr;
386 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
387 basic_block bb;
388
389 /* When there is no evolution in this loop, the evolution function
390 is not "simple". */
391 if (evolution_part == NULL_TREE(tree) nullptr)
392 return false;
393
394 /* When the evolution is a polynomial of degree >= 2
395 the evolution function is not "simple". */
396 if (tree_is_chrec (evolution_part))
397 return false;
398
399 step_expr = evolution_part;
400 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
401
402 if (dump_enabled_p ())
403 dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n",
404 step_expr, init_expr);
405
406 *init = init_expr;
407 *step = step_expr;
408
409 if (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != INTEGER_CST
410 && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != SSA_NAME
411 || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)(tree_check ((step_expr), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 411, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
))
412 && flow_bb_inside_loop_p (get_loop (cfun(cfun + 0), loop_nb), bb))
413 || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED
), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 413, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (((contains_struct_check ((step_expr), (
TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 413, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (((contains_struct_check ((step_expr), (
TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 413, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE
)
414 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED
), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 414, __FUNCTION__))->typed.type))->base.code) == REAL_TYPE
)
415 || !flag_associative_mathglobal_options.x_flag_associative_math)))
416 && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != REAL_CST
417 || !flag_associative_mathglobal_options.x_flag_associative_math))
418 {
419 if (dump_enabled_p ())
420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
421 "step unknown.\n");
422 return false;
423 }
424
425 return true;
426}
427
428/* Function vect_is_nonlinear_iv_evolution
429
430 Only support nonlinear induction for integer type
431 1. neg
432 2. mul by constant
433 3. lshift/rshift by constant.
434
435 For neg induction, return a fake step as integer -1. */
436static bool
437vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info,
438 gphi* loop_phi_node, tree *init, tree *step)
439{
440 tree init_expr, ev_expr, result, op1, op2;
441 gimple* def;
442
443 if (gimple_phi_num_args (loop_phi_node) != 2)
444 return false;
445
446 init_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_preheader_edge (loop))gimple_phi_arg_def (((loop_phi_node)), ((loop_preheader_edge (
loop))->dest_idx))
;
447 ev_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_latch_edge (loop))gimple_phi_arg_def (((loop_phi_node)), ((loop_latch_edge (loop
))->dest_idx))
;
448
449 /* Support nonlinear induction only for integer type. */
450 if (!INTEGRAL_TYPE_P (TREE_TYPE (init_expr))(((enum tree_code) (((contains_struct_check ((init_expr), (TS_TYPED
), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 450, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (((contains_struct_check ((init_expr), (
TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 450, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (((contains_struct_check ((init_expr), (
TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 450, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE
)
)
451 return false;
452
453 *init = init_expr;
454 result = PHI_RESULT (loop_phi_node)get_def_from_ptr (gimple_phi_result_ptr (loop_phi_node));
455
456 if (TREE_CODE (ev_expr)((enum tree_code) (ev_expr)->base.code) != SSA_NAME
457 || ((def = SSA_NAME_DEF_STMT (ev_expr)(tree_check ((ev_expr), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 457, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
), false)
458 || !is_gimple_assign (def))
459 return false;
460
461 enum tree_code t_code = gimple_assign_rhs_code (def);
462 switch (t_code)
463 {
464 case NEGATE_EXPR:
465 if (gimple_assign_rhs1 (def) != result)
466 return false;
467 *step = build_int_cst (TREE_TYPE (init_expr)((contains_struct_check ((init_expr), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 467, __FUNCTION__))->typed.type)
, -1);
468 STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_neg;
469 break;
470
471 case RSHIFT_EXPR:
472 case LSHIFT_EXPR:
473 case MULT_EXPR:
474 op1 = gimple_assign_rhs1 (def);
475 op2 = gimple_assign_rhs2 (def);
476 if (TREE_CODE (op2)((enum tree_code) (op2)->base.code) != INTEGER_CST
477 || op1 != result)
478 return false;
479 *step = op2;
480 if (t_code == LSHIFT_EXPR)
481 STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_shl;
482 else if (t_code == RSHIFT_EXPR)
483 STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_shr;
484 /* NEGATE_EXPR and MULT_EXPR are both vect_step_op_mul. */
485 else
486 STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info)(stmt_info)->loop_phi_evolution_type = vect_step_op_mul;
487 break;
488
489 default:
490 return false;
491 }
492
493 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info)(stmt_info)->loop_phi_evolution_base_unchanged = *init;
494 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info)(stmt_info)->loop_phi_evolution_part = *step;
495
496 return true;
497}
498
499/* Return true if PHI, described by STMT_INFO, is the inner PHI in
500 what we are assuming is a double reduction. For example, given
501 a structure like this:
502
503 outer1:
504 x_1 = PHI <x_4(outer2), ...>;
505 ...
506
507 inner:
508 x_2 = PHI <x_1(outer1), ...>;
509 ...
510 x_3 = ...;
511 ...
512
513 outer2:
514 x_4 = PHI <x_3(inner)>;
515 ...
516
517 outer loop analysis would treat x_1 as a double reduction phi and
518 this function would then return true for x_2. */
519
520static bool
521vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi)
522{
523 use_operand_p use_p;
524 ssa_op_iter op_iter;
525 FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)for ((use_p) = op_iter_init_phiuse (&(op_iter), phi, 0x01
); !op_iter_done (&(op_iter)); (use_p) = op_iter_next_use
(&(op_iter)))
526 if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)get_use_from_ptr (use_p)))
527 if (STMT_VINFO_DEF_TYPE (def_info)(def_info)->def_type == vect_double_reduction_def)
528 return true;
529 return false;
530}
531
532/* Returns true if Phi is a first-order recurrence. A first-order
533 recurrence is a non-reduction recurrence relation in which the value of
534 the recurrence in the current loop iteration equals a value defined in
535 the previous iteration. */
536
537static bool
538vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop,
539 gphi *phi)
540{
541 /* A nested cycle isn't vectorizable as first order recurrence. */
542 if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop)
543 return false;
544
545 /* Ensure the loop latch definition is from within the loop. */
546 edge latch = loop_latch_edge (loop);
547 tree ldef = PHI_ARG_DEF_FROM_EDGE (phi, latch)gimple_phi_arg_def (((phi)), ((latch)->dest_idx));
548 if (TREE_CODE (ldef)((enum tree_code) (ldef)->base.code) != SSA_NAME
549 || SSA_NAME_IS_DEFAULT_DEF (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 549, __FUNCTION__, (SSA_NAME)))->base.default_def_flag
550 || is_a <gphi *> (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 550, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
)
551 || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 551, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
)))
552 return false;
553
554 tree def = gimple_phi_result (phi);
555
556 /* Ensure every use_stmt of the phi node is dominated by the latch
557 definition. */
558 imm_use_iterator imm_iter;
559 use_operand_p use_p;
560 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (def)
); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p)
= next_readonly_imm_use (&(imm_iter))))
561 if (!is_gimple_debug (USE_STMT (use_p)(use_p)->loc.stmt)
562 && (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 562, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
== USE_STMT (use_p)(use_p)->loc.stmt
563 || !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (ldef)(tree_check ((ldef), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 563, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
,
564 USE_STMT (use_p)(use_p)->loc.stmt)))
565 return false;
566
567 /* First-order recurrence autovectorization needs shuffle vector. */
568 tree scalar_type = TREE_TYPE (def)((contains_struct_check ((def), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 568, __FUNCTION__))->typed.type)
;
569 tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
570 if (!vectype)
571 return false;
572
573 return true;
574}
575
576/* Function vect_analyze_scalar_cycles_1.
577
578 Examine the cross iteration def-use cycles of scalar variables
579 in LOOP. LOOP_VINFO represents the loop that is now being
580 considered for vectorization (can be LOOP, or an outer-loop
581 enclosing LOOP). SLP indicates there will be some subsequent
582 slp analyses or not. */
583
584static void
585vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
586 bool slp)
587{
588 basic_block bb = loop->header;
589 tree init, step;
590 auto_vec<stmt_vec_info, 64> worklist;
591 gphi_iterator gsi;
592 bool double_reduc, reduc_chain;
593
594 DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles")auto_dump_scope scope ("vect_analyze_scalar_cycles", vect_location
)
;
595
596 /* First - identify all inductions. Reduction detection assumes that all the
597 inductions have been identified, therefore, this order must not be
598 changed. */
599 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
600 {
601 gphi *phi = gsi.phi ();
602 tree access_fn = NULLnullptr;
603 tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
604 stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi);
605
606 if (dump_enabled_p ())
607 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
608 (gimple *) phi);
609
610 /* Skip virtual phi's. The data dependences that are associated with
611 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
612 if (virtual_operand_p (def))
613 continue;
614
615 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_unknown_def_type;
616
617 /* Analyze the evolution function. */
618 access_fn = analyze_scalar_evolution (loop, def);
619 if (access_fn)
620 {
621 STRIP_NOPS (access_fn)(access_fn) = tree_strip_nop_conversions ((const_cast<union
tree_node *> (((access_fn)))))
;
622 if (dump_enabled_p ())
623 dump_printf_loc (MSG_NOTE, vect_location,
624 "Access function of PHI: %T\n", access_fn);
625 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_base_unchanged
626 = initial_condition_in_loop_num (access_fn, loop->num);
627 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_part
628 = evolution_part_in_loop_num (access_fn, loop->num);
629 }
630
631 if ((!access_fn
632 || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi)
633 || !vect_is_simple_iv_evolution (loop->num, access_fn,
634 &init, &step)
635 || (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop
636 && TREE_CODE (step)((enum tree_code) (step)->base.code) != INTEGER_CST))
637 /* Only handle nonlinear iv for same loop. */
638 && (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop
639 || !vect_is_nonlinear_iv_evolution (loop, stmt_vinfo,
640 phi, &init, &step)))
641 {
642 worklist.safe_push (stmt_vinfo);
643 continue;
644 }
645
646 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged !=
(tree) nullptr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 647, __FUNCTION__), 0 : 0))
647 != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged !=
(tree) nullptr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 647, __FUNCTION__), 0 : 0))
;
648 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_part != (tree) nullptr
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 648, __FUNCTION__), 0 : 0))
;
649
650 if (dump_enabled_p ())
651 dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
652 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_induction_def;
653 }
654
655
656 /* Second - identify all reductions and nested cycles. */
657 while (worklist.length () > 0)
658 {
659 stmt_vec_info stmt_vinfo = worklist.pop ();
660 gphi *phi = as_a <gphi *> (stmt_vinfo->stmt);
661 tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
662
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
665 (gimple *) phi);
666
667 gcc_assert (!virtual_operand_p (def)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)->
def_type == vect_unknown_def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 668, __FUNCTION__), 0 : 0))
668 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)->
def_type == vect_unknown_def_type) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 668, __FUNCTION__), 0 : 0))
;
669
670 stmt_vec_info reduc_stmt_info
671 = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc,
672 &reduc_chain, slp);
673 if (reduc_stmt_info)
674 {
675 STMT_VINFO_REDUC_DEF (stmt_vinfo)(stmt_vinfo)->reduc_def = reduc_stmt_info;
676 STMT_VINFO_REDUC_DEF (reduc_stmt_info)(reduc_stmt_info)->reduc_def = stmt_vinfo;
677 if (double_reduc)
678 {
679 if (dump_enabled_p ())
680 dump_printf_loc (MSG_NOTE, vect_location,
681 "Detected double reduction.\n");
682
683 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_double_reduction_def;
684 STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_double_reduction_def;
685 }
686 else
687 {
688 if (loop != LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)
689 {
690 if (dump_enabled_p ())
691 dump_printf_loc (MSG_NOTE, vect_location,
692 "Detected vectorizable nested cycle.\n");
693
694 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_nested_cycle;
695 }
696 else
697 {
698 if (dump_enabled_p ())
699 dump_printf_loc (MSG_NOTE, vect_location,
700 "Detected reduction.\n");
701
702 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_reduction_def;
703 STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_reduction_def;
704 /* Store the reduction cycles for possible vectorization in
705 loop-aware SLP if it was not detected as reduction
706 chain. */
707 if (! reduc_chain)
708 LOOP_VINFO_REDUCTIONS (loop_vinfo)(loop_vinfo)->reductions.safe_push
709 (reduc_stmt_info);
710 }
711 }
712 }
713 else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi))
714 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_first_order_recurrence;
715 else
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
718 "Unknown def-use cycle pattern.\n");
719 }
720}
721
722
723/* Function vect_analyze_scalar_cycles.
724
725 Examine the cross iteration def-use cycles of scalar variables, by
726 analyzing the loop-header PHIs of scalar variables. Classify each
727 cycle as one of the following: invariant, induction, reduction, unknown.
728 We do that for the loop represented by LOOP_VINFO, and also to its
729 inner-loop, if exists.
730 Examples for scalar cycles:
731
732 Example1: reduction:
733
734 loop1:
735 for (i=0; i<N; i++)
736 sum += a[i];
737
738 Example2: induction:
739
740 loop2:
741 for (i=0; i<N; i++)
742 a[i] = i; */
743
744static void
745vect_analyze_scalar_cycles (loop_vec_info loop_vinfo, bool slp)
746{
747 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
748
749 vect_analyze_scalar_cycles_1 (loop_vinfo, loop, slp);
750
751 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
752 Reductions in such inner-loop therefore have different properties than
753 the reductions in the nest that gets vectorized:
754 1. When vectorized, they are executed in the same order as in the original
755 scalar loop, so we can't change the order of computation when
756 vectorizing them.
757 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
758 current checks are too strict. */
759
760 if (loop->inner)
761 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner, slp);
762}
763
764/* Transfer group and reduction information from STMT_INFO to its
765 pattern stmt. */
766
767static void
768vect_fixup_reduc_chain (stmt_vec_info stmt_info)
769{
770 stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
771 stmt_vec_info stmtp;
772 gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort (
"/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 772, __FUNCTION__), 0 : 0)), (firstp)->first_element) &&
(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 773, __FUNCTION__), 0 : 0)), (stmt_info)->first_element)
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 773, __FUNCTION__), 0 : 0))
773 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort (
"/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 772, __FUNCTION__), 0 : 0)), (firstp)->first_element) &&
(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 773, __FUNCTION__), 0 : 0)), (stmt_info)->first_element)
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 773, __FUNCTION__), 0 : 0))
;
774 REDUC_GROUP_SIZE (firstp)(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 774, __FUNCTION__), 0 : 0)), (firstp)->size)
= REDUC_GROUP_SIZE (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 774, __FUNCTION__), 0 : 0)), (stmt_info)->size)
;
775 do
776 {
777 stmtp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
778 gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)((void)(!((stmtp)->def_type == (stmt_info)->def_type) ?
fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 779, __FUNCTION__), 0 : 0))
779 == STMT_VINFO_DEF_TYPE (stmt_info))((void)(!((stmtp)->def_type == (stmt_info)->def_type) ?
fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 779, __FUNCTION__), 0 : 0))
;
780 REDUC_GROUP_FIRST_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 780, __FUNCTION__), 0 : 0)), (stmtp)->first_element)
= firstp;
781 stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 781, __FUNCTION__), 0 : 0)), (stmt_info)->next_element)
;
782 if (stmt_info)
783 REDUC_GROUP_NEXT_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 783, __FUNCTION__), 0 : 0)), (stmtp)->next_element)
784 = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
785 }
786 while (stmt_info);
787}
788
789/* Fixup scalar cycles that now have their stmts detected as patterns. */
790
791static void
792vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
793{
794 stmt_vec_info first;
795 unsigned i;
796
797 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)for (i = 0; ((loop_vinfo)->reduction_chains).iterate ((i),
&(first)); ++(i))
798 {
799 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first)(((void)(!(!(first)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 799, __FUNCTION__), 0 : 0)), (first)->next_element)
;
800 while (next)
801 {
802 if ((STMT_VINFO_IN_PATTERN_P (next)(next)->in_pattern_p
803 != STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p)
804 || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next))(vect_stmt_to_vectorize (next))->reduc_idx == -1)
805 break;
806 next = REDUC_GROUP_NEXT_ELEMENT (next)(((void)(!(!(next)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 806, __FUNCTION__), 0 : 0)), (next)->next_element)
;
807 }
808 /* If all reduction chain members are well-formed patterns adjust
809 the group to group the pattern stmts instead. */
810 if (! next
811 && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->reduc_idx != -1)
812 {
813 if (STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p)
814 {
815 vect_fixup_reduc_chain (first);
816 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains[i]
817 = STMT_VINFO_RELATED_STMT (first)(first)->related_stmt;
818 }
819 }
820 /* If not all stmt in the chain are patterns or if we failed
821 to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
822 it as regular reduction instead. */
823 else
824 {
825 stmt_vec_info vinfo = first;
826 stmt_vec_info last = NULLnullptr;
827 while (vinfo)
828 {
829 next = REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 829, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
;
830 REDUC_GROUP_FIRST_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 830, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
= NULLnullptr;
831 REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 831, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
= NULLnullptr;
832 last = vinfo;
833 vinfo = next;
834 }
835 STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->def_type
836 = vect_internal_def;
837 loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last));
838 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.unordered_remove (i);
839 --i;
840 }
841 }
842}
843
844/* Function vect_get_loop_niters.
845
846 Determine how many iterations the loop is executed and place it
847 in NUMBER_OF_ITERATIONS. Place the number of latch iterations
848 in NUMBER_OF_ITERATIONSM1. Place the condition under which the
849 niter information holds in ASSUMPTIONS.
850
851 Return the loop exit condition. */
852
853
854static gcond *
855vect_get_loop_niters (class loop *loop, tree *assumptions,
856 tree *number_of_iterations, tree *number_of_iterationsm1)
857{
858 edge exit = single_exit (loop);
859 class tree_niter_desc niter_desc;
860 tree niter_assumptions, niter, may_be_zero;
861 gcond *cond = get_loop_exit_condition (loop);
862
863 *assumptions = boolean_true_nodeglobal_trees[TI_BOOLEAN_TRUE];
864 *number_of_iterationsm1 = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW];
865 *number_of_iterations = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW];
866 DUMP_VECT_SCOPE ("get_loop_niters")auto_dump_scope scope ("get_loop_niters", vect_location);
867
868 if (!exit)
869 return cond;
870
871 may_be_zero = NULL_TREE(tree) nullptr;
872 if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULLnullptr)
873 || chrec_contains_undetermined (niter_desc.niter))
874 return cond;
875
876 niter_assumptions = niter_desc.assumptions;
877 may_be_zero = niter_desc.may_be_zero;
878 niter = niter_desc.niter;
879
880 if (may_be_zero && integer_zerop (may_be_zero))
881 may_be_zero = NULL_TREE(tree) nullptr;
882
883 if (may_be_zero)
884 {
885 if (COMPARISON_CLASS_P (may_be_zero)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code
) (may_be_zero)->base.code))] == tcc_comparison)
)
886 {
887 /* Try to combine may_be_zero with assumptions, this can simplify
888 computation of niter expression. */
889 if (niter_assumptions && !integer_nonzerop (niter_assumptions))
890 niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
891 niter_assumptions,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
892 fold_build1 (TRUTH_NOT_EXPR,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
893 boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
894 may_be_zero))fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
;
895 else
896 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 896, __FUNCTION__))->typed.type), may_be_zero, build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 897, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow
(niter) )
897 build_int_cst (TREE_TYPE (niter), 0),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 896, __FUNCTION__))->typed.type), may_be_zero, build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 897, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow
(niter) )
898 rewrite_to_non_trapping_overflow (niter))fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 896, __FUNCTION__))->typed.type), may_be_zero, build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 897, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow
(niter) )
;
899
900 may_be_zero = NULL_TREE(tree) nullptr;
901 }
902 else if (integer_nonzerop (may_be_zero))
903 {
904 *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 904, __FUNCTION__))->typed.type)
, 0);
905 *number_of_iterations = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 905, __FUNCTION__))->typed.type)
, 1);
906 return cond;
907 }
908 else
909 return cond;
910 }
911
912 *assumptions = niter_assumptions;
913 *number_of_iterationsm1 = niter;
914
915 /* We want the number of loop header executions which is the number
916 of latch executions plus one.
917 ??? For UINT_MAX latch executions this number overflows to zero
918 for loops like do { n++; } while (n != 0); */
919 if (niter && !chrec_contains_undetermined (niter))
920 niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 920, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 921, __FUNCTION__))->typed.type), 1) )
921 build_int_cst (TREE_TYPE (niter), 1))fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 920, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 921, __FUNCTION__))->typed.type), 1) )
;
922 *number_of_iterations = niter;
923
924 return cond;
925}
926
927/* Function bb_in_loop_p
928
929 Used as predicate for dfs order traversal of the loop bbs. */
930
931static bool
932bb_in_loop_p (const_basic_block bb, const void *data)
933{
934 const class loop *const loop = (const class loop *)data;
935 if (flow_bb_inside_loop_p (loop, bb))
936 return true;
937 return false;
938}
939
940
941/* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as
942 stmt_vec_info structs for all the stmts in LOOP_IN. */
943
944_loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
945 : vec_info (vec_info::loop, shared),
946 loop (loop_in),
947 bbs (XCNEWVEC (basic_block, loop->num_nodes)((basic_block *) xcalloc ((loop->num_nodes), sizeof (basic_block
)))
),
948 num_itersm1 (NULL_TREE(tree) nullptr),
949 num_iters (NULL_TREE(tree) nullptr),
950 num_iters_unchanged (NULL_TREE(tree) nullptr),
951 num_iters_assumptions (NULL_TREE(tree) nullptr),
952 vector_costs (nullptr),
953 scalar_costs (nullptr),
954 th (0),
955 versioning_threshold (0),
956 vectorization_factor (0),
957 main_loop_edge (nullptr),
958 skip_main_loop_edge (nullptr),
959 skip_this_loop_edge (nullptr),
960 reusable_accumulators (),
961 suggested_unroll_factor (1),
962 max_vectorization_factor (0),
963 mask_skip_niters (NULL_TREE(tree) nullptr),
964 rgroup_compare_type (NULL_TREE(tree) nullptr),
965 simd_if_cond (NULL_TREE(tree) nullptr),
966 unaligned_dr (NULLnullptr),
967 peeling_for_alignment (0),
968 ptr_mask (0),
969 ivexpr_map (NULLnullptr),
970 scan_map (NULLnullptr),
971 slp_unrolling_factor (1),
972 inner_loop_cost_factor (param_vect_inner_loop_cost_factorglobal_options.x_param_vect_inner_loop_cost_factor),
973 vectorizable (false),
974 can_use_partial_vectors_p (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0),
975 using_partial_vectors_p (false),
976 epil_using_partial_vectors_p (false),
977 partial_load_store_bias (0),
978 peeling_for_gaps (false),
979 peeling_for_niter (false),
980 no_data_dependencies (false),
981 has_mask_store (false),
982 scalar_loop_scaling (profile_probability::uninitialized ()),
983 scalar_loop (NULLnullptr),
984 orig_loop_info (NULLnullptr)
985{
986 /* CHECKME: We want to visit all BBs before their successors (except for
987 latch blocks, for which this assertion wouldn't hold). In the simple
988 case of the loop forms we allow, a dfs order of the BBs would the same
989 as reversed postorder traversal, so we are safe. */
990
991 unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
992 bbs, loop->num_nodes, loop);
993 gcc_assert (nbbs == loop->num_nodes)((void)(!(nbbs == loop->num_nodes) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 993, __FUNCTION__), 0 : 0))
;
994
995 for (unsigned int i = 0; i < nbbs; i++)
996 {
997 basic_block bb = bbs[i];
998 gimple_stmt_iterator si;
999
1000 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1001 {
1002 gimple *phi = gsi_stmt (si);
1003 gimple_set_uid (phi, 0);
1004 add_stmt (phi);
1005 }
1006
1007 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1008 {
1009 gimple *stmt = gsi_stmt (si);
1010 gimple_set_uid (stmt, 0);
1011 if (is_gimple_debug (stmt))
1012 continue;
1013 add_stmt (stmt);
1014 /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the
1015 third argument is the #pragma omp simd if (x) condition, when 0,
1016 loop shouldn't be vectorized, when non-zero constant, it should
1017 be vectorized normally, otherwise versioned with vectorized loop
1018 done if the condition is non-zero at runtime. */
1019 if (loop_in->simduid
1020 && is_gimple_call (stmt)
1021 && gimple_call_internal_p (stmt)
1022 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1023 && gimple_call_num_args (stmt) >= 3
1024 && TREE_CODE (gimple_call_arg (stmt, 0))((enum tree_code) (gimple_call_arg (stmt, 0))->base.code) == SSA_NAME
1025 && (loop_in->simduid
1026 == SSA_NAME_VAR (gimple_call_arg (stmt, 0))((tree_check ((gimple_call_arg (stmt, 0)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1026, __FUNCTION__, (SSA_NAME)))->ssa_name.var == (tree)
nullptr || ((enum tree_code) ((gimple_call_arg (stmt, 0))->
ssa_name.var)->base.code) == IDENTIFIER_NODE ? (tree) nullptr
: (gimple_call_arg (stmt, 0))->ssa_name.var)
))
1027 {
1028 tree arg = gimple_call_arg (stmt, 2);
1029 if (integer_zerop (arg) || TREE_CODE (arg)((enum tree_code) (arg)->base.code) == SSA_NAME)
1030 simd_if_cond = arg;
1031 else
1032 gcc_assert (integer_nonzerop (arg))((void)(!(integer_nonzerop (arg)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1032, __FUNCTION__), 0 : 0))
;
1033 }
1034 }
1035 }
1036
1037 epilogue_vinfos.create (6);
1038}
1039
1040/* Free all levels of rgroup CONTROLS. */
1041
1042void
1043release_vec_loop_controls (vec<rgroup_controls> *controls)
1044{
1045 rgroup_controls *rgc;
1046 unsigned int i;
1047 FOR_EACH_VEC_ELT (*controls, i, rgc)for (i = 0; (*controls).iterate ((i), &(rgc)); ++(i))
1048 rgc->controls.release ();
1049 controls->release ();
1050}
1051
1052/* Free all memory used by the _loop_vec_info, as well as all the
1053 stmt_vec_info structs of all the stmts in the loop. */
1054
1055_loop_vec_info::~_loop_vec_info ()
1056{
1057 free (bbs);
1058
1059 release_vec_loop_controls (&masks);
1060 release_vec_loop_controls (&lens);
1061 delete ivexpr_map;
1062 delete scan_map;
1063 epilogue_vinfos.release ();
1064 delete scalar_costs;
1065 delete vector_costs;
1066
1067 /* When we release an epiloge vinfo that we do not intend to use
1068 avoid clearing AUX of the main loop which should continue to
1069 point to the main loop vinfo since otherwise we'll leak that. */
1070 if (loop->aux == this)
1071 loop->aux = NULLnullptr;
1072}
1073
1074/* Return an invariant or register for EXPR and emit necessary
1075 computations in the LOOP_VINFO loop preheader. */
1076
1077tree
1078cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr)
1079{
1080 if (is_gimple_reg (expr)
1081 || is_gimple_min_invariant (expr))
1082 return expr;
1083
1084 if (! loop_vinfo->ivexpr_map)
1085 loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>;
1086 tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr);
1087 if (! cached)
1088 {
1089 gimple_seq stmts = NULLnullptr;
1090 cached = force_gimple_operand (unshare_expr (expr),
1091 &stmts, true, NULL_TREE(tree) nullptr);
1092 if (stmts)
1093 {
1094 edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
1095 gsi_insert_seq_on_edge_immediate (e, stmts);
1096 }
1097 }
1098 return cached;
1099}
1100
1101/* Return true if we can use CMP_TYPE as the comparison type to produce
1102 all masks required to mask LOOP_VINFO. */
1103
1104static bool
1105can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type)
1106{
1107 rgroup_controls *rgm;
1108 unsigned int i;
1109 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm)
); ++(i))
1110 if (rgm->type != NULL_TREE(tree) nullptr
1111 && !direct_internal_fn_supported_p (IFN_WHILE_ULT,
1112 cmp_type, rgm->type,
1113 OPTIMIZE_FOR_SPEED))
1114 return false;
1115 return true;
1116}
1117
1118/* Calculate the maximum number of scalars per iteration for every
1119 rgroup in LOOP_VINFO. */
1120
1121static unsigned int
1122vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
1123{
1124 unsigned int res = 1;
1125 unsigned int i;
1126 rgroup_controls *rgm;
1127 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm)
); ++(i))
1128 res = MAX (res, rgm->max_nscalars_per_iter)((res) > (rgm->max_nscalars_per_iter) ? (res) : (rgm->
max_nscalars_per_iter))
;
1129 return res;
1130}
1131
1132/* Calculate the minimum precision necessary to represent:
1133
1134 MAX_NITERS * FACTOR
1135
1136 as an unsigned integer, where MAX_NITERS is the maximum number of
1137 loop header iterations for the original scalar form of LOOP_VINFO. */
1138
1139static unsigned
1140vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
1141{
1142 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1143
1144 /* Get the maximum number of iterations that is representable
1145 in the counter type. */
1146 tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo))((contains_struct_check (((loop_vinfo)->num_itersm1), (TS_TYPED
), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1146, __FUNCTION__))->typed.type)
;
1147 widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)((tree_check5 ((ni_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1147, __FUNCTION__, (INTEGER_TYPE), (ENUMERAL_TYPE), (BOOLEAN_TYPE
), (REAL_TYPE), (FIXED_POINT_TYPE)))->type_non_common.maxval
)
) + 1;
1148
1149 /* Get a more refined estimate for the number of iterations. */
1150 widest_int max_back_edges;
1151 if (max_loop_iterations (loop, &max_back_edges))
1152 max_ni = wi::smin (max_ni, max_back_edges + 1);
1153
1154 /* Work out how many bits we need to represent the limit. */
1155 return wi::min_precision (max_ni * factor, UNSIGNED);
1156}
1157
1158/* True if the loop needs peeling or partial vectors when vectorized. */
1159
1160static bool
1161vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo)
1162{
1163 unsigned HOST_WIDE_INTlong const_vf;
1164 HOST_WIDE_INTlong max_niter
1165 = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
1166
1167 unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th;
1168 if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info)
1169 th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO((loop_vinfo)->orig_loop_info)->th
1170 (loop_vinfo))((loop_vinfo)->orig_loop_info)->th;
1171
1172 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
1173 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment >= 0)
1174 {
1175 /* Work out the (constant) number of iterations that need to be
1176 peeled for reasons other than niters. */
1177 unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment;
1178 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps)
1179 peel_niter += 1;
1180 if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1180, __FUNCTION__))))
- peel_niter,
1181 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor))
1182 return true;
1183 }
1184 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment
1185 /* ??? When peeling for gaps but not alignment, we could
1186 try to check whether the (variable) niters is known to be
1187 VF * N + 1. That's something of a niche case though. */
1188 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
1189 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor.is_constant (&const_vf)
1190 || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters)
1191 < (unsigned) exact_log2 (const_vf))
1192 /* In case of versioning, check if the maximum number of
1193 iterations is greater than th. If they are identical,
1194 the epilogue is unnecessary. */
1195 && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
1196 || ((unsigned HOST_WIDE_INTlong) max_niter
1197 > (th / const_vf) * const_vf))))
1198 return true;
1199
1200 return false;
1201}
1202
1203/* Each statement in LOOP_VINFO can be masked where necessary. Check
1204 whether we can actually generate the masks required. Return true if so,
1205 storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */
1206
1207static bool
1208vect_verify_full_masking (loop_vec_info loop_vinfo)
1209{
1210 unsigned int min_ni_width;
1211 unsigned int max_nscalars_per_iter
1212 = vect_get_max_nscalars_per_iter (loop_vinfo);
1213
1214 /* Use a normal loop if there are no statements that need masking.
1215 This only happens in rare degenerate cases: it means that the loop
1216 has no loads, no stores, and no live-out values. */
1217 if (LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ())
1218 return false;
1219
1220 /* Work out how many bits we need to represent the limit. */
1221 min_ni_width
1222 = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter);
1223
1224 /* Find a scalar mode for which WHILE_ULT is supported. */
1225 opt_scalar_int_mode cmp_mode_iter;
1226 tree cmp_type = NULL_TREE(tree) nullptr;
1227 tree iv_type = NULL_TREE(tree) nullptr;
1228 widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
1229 unsigned int iv_precision = UINT_MAX(2147483647 *2U +1U);
1230
1231 if (iv_limit != -1)
1232 iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter,
1233 UNSIGNED);
1234
1235 FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)for (mode_iterator::start (&(cmp_mode_iter), MODE_INT); mode_iterator
::iterate_p (&(cmp_mode_iter)); mode_iterator::get_next (
&(cmp_mode_iter)))
1236 {
1237 unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ());
1238 if (cmp_bits >= min_ni_width
1239 && targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1240 {
1241 tree this_type = build_nonstandard_integer_type (cmp_bits, true);
1242 if (this_type
1243 && can_produce_all_loop_masks_p (loop_vinfo, this_type))
1244 {
1245 /* Although we could stop as soon as we find a valid mode,
1246 there are at least two reasons why that's not always the
1247 best choice:
1248
1249 - An IV that's Pmode or wider is more likely to be reusable
1250 in address calculations than an IV that's narrower than
1251 Pmode.
1252
1253 - Doing the comparison in IV_PRECISION or wider allows
1254 a natural 0-based IV, whereas using a narrower comparison
1255 type requires mitigations against wrap-around.
1256
1257 Conversely, if the IV limit is variable, doing the comparison
1258 in a wider type than the original type can introduce
1259 unnecessary extensions, so picking the widest valid mode
1260 is not always a good choice either.
1261
1262 Here we prefer the first IV type that's Pmode or wider,
1263 and the first comparison type that's IV_PRECISION or wider.
1264 (The comparison type must be no wider than the IV type,
1265 to avoid extensions in the vector loop.)
1266
1267 ??? We might want to try continuing beyond Pmode for ILP32
1268 targets if CMP_BITS < IV_PRECISION. */
1269 iv_type = this_type;
1270 if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type)((tree_class_check ((cmp_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1270, __FUNCTION__))->type_common.precision)
)
1271 cmp_type = this_type;
1272 if (cmp_bits >= GET_MODE_BITSIZE (Pmode(global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode (
(scalar_int_mode::from_int) E_DImode)) : (scalar_int_mode ((scalar_int_mode
::from_int) E_SImode)))
))
1273 break;
1274 }
1275 }
1276 }
1277
1278 if (!cmp_type)
1279 return false;
1280
1281 LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = cmp_type;
1282 LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type;
1283 return true;
1284}
1285
1286/* Check whether we can use vector access with length based on precison
1287 comparison. So far, to keep it simple, we only allow the case that the
1288 precision of the target supported length is larger than the precision
1289 required by loop niters. */
1290
1291static bool
1292vect_verify_loop_lens (loop_vec_info loop_vinfo)
1293{
1294 if (LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ())
1295 return false;
1296
1297 machine_mode len_load_mode = get_len_load_store_mode
1298 (loop_vinfo->vector_mode, true).require ();
1299 machine_mode len_store_mode = get_len_load_store_mode
1300 (loop_vinfo->vector_mode, false).require ();
1301
1302 signed char partial_load_bias = internal_len_load_store_bias
1303 (IFN_LEN_LOAD, len_load_mode);
1304
1305 signed char partial_store_bias = internal_len_load_store_bias
1306 (IFN_LEN_STORE, len_store_mode);
1307
1308 gcc_assert (partial_load_bias == partial_store_bias)((void)(!(partial_load_bias == partial_store_bias) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1308, __FUNCTION__), 0 : 0))
;
1309
1310 if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED127)
1311 return false;
1312
1313 /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit
1314 len_loads with a length of zero. In order to avoid that we prohibit
1315 more than one loop length here. */
1316 if (partial_load_bias == -1
1317 && LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.length () > 1)
1318 return false;
1319
1320 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)(loop_vinfo)->partial_load_store_bias = partial_load_bias;
1321
1322 unsigned int max_nitems_per_iter = 1;
1323 unsigned int i;
1324 rgroup_controls *rgl;
1325 /* Find the maximum number of items per iteration for every rgroup. */
1326 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)for (i = 0; ((loop_vinfo)->lens).iterate ((i), &(rgl))
; ++(i))
1327 {
1328 unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor;
1329 max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter)((max_nitems_per_iter) > (nitems_per_iter) ? (max_nitems_per_iter
) : (nitems_per_iter))
;
1330 }
1331
1332 /* Work out how many bits we need to represent the length limit. */
1333 unsigned int min_ni_prec
1334 = vect_min_prec_for_max_niters (loop_vinfo, max_nitems_per_iter);
1335
1336 /* Now use the maximum of below precisions for one suitable IV type:
1337 - the IV's natural precision
1338 - the precision needed to hold: the maximum number of scalar
1339 iterations multiplied by the scale factor (min_ni_prec above)
1340 - the Pmode precision
1341
1342 If min_ni_prec is less than the precision of the current niters,
1343 we perfer to still use the niters type. Prefer to use Pmode and
1344 wider IV to avoid narrow conversions. */
1345
1346 unsigned int ni_prec
1347 = TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)))((tree_class_check ((((contains_struct_check (((loop_vinfo)->
num_iters), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1347, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1347, __FUNCTION__))->type_common.precision)
;
1348 min_ni_prec = MAX (min_ni_prec, ni_prec)((min_ni_prec) > (ni_prec) ? (min_ni_prec) : (ni_prec));
1349 min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode))((min_ni_prec) > (GET_MODE_BITSIZE ((global_options.x_ix86_pmode
== PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode
)) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode))
))) ? (min_ni_prec) : (GET_MODE_BITSIZE ((global_options.x_ix86_pmode
== PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode
)) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode))
))))
;
1350
1351 tree iv_type = NULL_TREE(tree) nullptr;
1352 opt_scalar_int_mode tmode_iter;
1353 FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)for (mode_iterator::start (&(tmode_iter), MODE_INT); mode_iterator
::iterate_p (&(tmode_iter)); mode_iterator::get_next (&
(tmode_iter)))
1354 {
1355 scalar_mode tmode = tmode_iter.require ();
1356 unsigned int tbits = GET_MODE_BITSIZE (tmode);
1357
1358 /* ??? Do we really want to construct one IV whose precision exceeds
1359 BITS_PER_WORD? */
1360 if (tbits > BITS_PER_WORD((8) * (((global_options.x_ix86_isa_flags & (1UL <<
1)) != 0) ? 8 : 4))
)
1361 break;
1362
1363 /* Find the first available standard integral type. */
1364 if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode))
1365 {
1366 iv_type = build_nonstandard_integer_type (tbits, true);
1367 break;
1368 }
1369 }
1370
1371 if (!iv_type)
1372 {
1373 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1375 "can't vectorize with length-based partial vectors"
1376 " because there is no suitable iv type.\n");
1377 return false;
1378 }
1379
1380 LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = iv_type;
1381 LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type;
1382
1383 return true;
1384}
1385
1386/* Calculate the cost of one scalar iteration of the loop. */
1387static void
1388vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1389{
1390 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1391 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
1392 int nbbs = loop->num_nodes, factor;
1393 int innerloop_iters, i;
1394
1395 DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost")auto_dump_scope scope ("vect_compute_single_scalar_iteration_cost"
, vect_location)
;
1396
1397 /* Gather costs for statements in the scalar loop. */
1398
1399 /* FORNOW. */
1400 innerloop_iters = 1;
1401 if (loop->inner)
1402 innerloop_iters = LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)(loop_vinfo)->inner_loop_cost_factor;
1403
1404 for (i = 0; i < nbbs; i++)
1405 {
1406 gimple_stmt_iterator si;
1407 basic_block bb = bbs[i];
1408
1409 if (bb->loop_father == loop->inner)
1410 factor = innerloop_iters;
1411 else
1412 factor = 1;
1413
1414 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1415 {
1416 gimple *stmt = gsi_stmt (si);
1417 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
1418
1419 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1420 continue;
1421
1422 /* Skip stmts that are not vectorized inside the loop. */
1423 stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info);
1424 if (!STMT_VINFO_RELEVANT_P (vstmt_info)((vstmt_info)->relevant != vect_unused_in_scope)
1425 && (!STMT_VINFO_LIVE_P (vstmt_info)(vstmt_info)->live
1426 || !VECTORIZABLE_CYCLE_DEF((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info
)->def_type) == vect_double_reduction_def) || (((vstmt_info
)->def_type) == vect_nested_cycle))
1427 (STMT_VINFO_DEF_TYPE (vstmt_info))((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info
)->def_type) == vect_double_reduction_def) || (((vstmt_info
)->def_type) == vect_nested_cycle))
))
1428 continue;
1429
1430 vect_cost_for_stmt kind;
1431 if (STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0))
1432 {
1433 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))(((stmt_info)->dr_aux.dr + 0))->is_read)
1434 kind = scalar_load;
1435 else
1436 kind = scalar_store;
1437 }
1438 else if (vect_nop_conversion_p (stmt_info))
1439 continue;
1440 else
1441 kind = scalar_stmt;
1442
1443 /* We are using vect_prologue here to avoid scaling twice
1444 by the inner loop factor. */
1445 record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec,
1446 factor, kind, stmt_info, 0, vect_prologue);
1447 }
1448 }
1449
1450 /* Now accumulate cost. */
1451 loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
1452 add_stmt_costs (loop_vinfo->scalar_costs,
1453 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec);
1454 loop_vinfo->scalar_costs->finish_cost (nullptr);
1455}
1456
1457
1458/* Function vect_analyze_loop_form.
1459
1460 Verify that certain CFG restrictions hold, including:
1461 - the loop has a pre-header
1462 - the loop has a single entry and exit
1463 - the loop exit condition is simple enough
1464 - the number of iterations can be analyzed, i.e, a countable loop. The
1465 niter could be analyzed under some assumptions. */
1466
1467opt_result
1468vect_analyze_loop_form (class loop *loop, vect_loop_form_info *info)
1469{
1470 DUMP_VECT_SCOPE ("vect_analyze_loop_form")auto_dump_scope scope ("vect_analyze_loop_form", vect_location
)
;
1471
1472 /* Different restrictions apply when we are considering an inner-most loop,
1473 vs. an outer (nested) loop.
1474 (FORNOW. May want to relax some of these restrictions in the future). */
1475
1476 info->inner_loop_cond = NULLnullptr;
1477 if (!loop->inner)
1478 {
1479 /* Inner-most loop. We currently require that the number of BBs is
1480 exactly 2 (the header and latch). Vectorizable inner-most loops
1481 look like this:
1482
1483 (pre-header)
1484 |
1485 header <--------+
1486 | | |
1487 | +--> latch --+
1488 |
1489 (exit-bb) */
1490
1491 if (loop->num_nodes != 2)
1492 return opt_result::failure_at (vect_location,
1493 "not vectorized:"
1494 " control flow in loop.\n");
1495
1496 if (empty_block_p (loop->header))
1497 return opt_result::failure_at (vect_location,
1498 "not vectorized: empty loop.\n");
1499 }
1500 else
1501 {
1502 class loop *innerloop = loop->inner;
1503 edge entryedge;
1504
1505 /* Nested loop. We currently require that the loop is doubly-nested,
1506 contains a single inner loop, and the number of BBs is exactly 5.
1507 Vectorizable outer-loops look like this:
1508
1509 (pre-header)
1510 |
1511 header <---+
1512 | |
1513 inner-loop |
1514 | |
1515 tail ------+
1516 |
1517 (exit-bb)
1518
1519 The inner-loop has the properties expected of inner-most loops
1520 as described above. */
1521
1522 if ((loop->inner)->inner || (loop->inner)->next)
1523 return opt_result::failure_at (vect_location,
1524 "not vectorized:"
1525 " multiple nested loops.\n");
1526
1527 if (loop->num_nodes != 5)
1528 return opt_result::failure_at (vect_location,
1529 "not vectorized:"
1530 " control flow in loop.\n");
1531
1532 entryedge = loop_preheader_edge (innerloop);
1533 if (entryedge->src != loop->header
1534 || !single_exit (innerloop)
1535 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)(*(loop->latch)->preds)[(0)]->src)
1536 return opt_result::failure_at (vect_location,
1537 "not vectorized:"
1538 " unsupported outerloop form.\n");
1539
1540 /* Analyze the inner-loop. */
1541 vect_loop_form_info inner;
1542 opt_result res = vect_analyze_loop_form (loop->inner, &inner);
1543 if (!res)
1544 {
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1547 "not vectorized: Bad inner loop.\n");
1548 return res;
1549 }
1550
1551 /* Don't support analyzing niter under assumptions for inner
1552 loop. */
1553 if (!integer_onep (inner.assumptions))
1554 return opt_result::failure_at (vect_location,
1555 "not vectorized: Bad inner loop.\n");
1556
1557 if (!expr_invariant_in_loop_p (loop, inner.number_of_iterations))
1558 return opt_result::failure_at (vect_location,
1559 "not vectorized: inner-loop count not"
1560 " invariant.\n");
1561
1562 if (dump_enabled_p ())
1563 dump_printf_loc (MSG_NOTE, vect_location,
1564 "Considering outer-loop vectorization.\n");
1565 info->inner_loop_cond = inner.loop_cond;
1566 }
1567
1568 if (!single_exit (loop))
1569 return opt_result::failure_at (vect_location,
1570 "not vectorized: multiple exits.\n");
1571 if (EDGE_COUNT (loop->header->preds)vec_safe_length (loop->header->preds) != 2)
1572 return opt_result::failure_at (vect_location,
1573 "not vectorized:"
1574 " too many incoming edges.\n");
1575
1576 /* We assume that the loop exit condition is at the end of the loop. i.e,
1577 that the loop is represented as a do-while (with a proper if-guard
1578 before the loop if needed), where the loop header contains all the
1579 executable statements, and the latch is empty. */
1580 if (!empty_block_p (loop->latch)
1581 || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1582 return opt_result::failure_at (vect_location,
1583 "not vectorized: latch block not empty.\n");
1584
1585 /* Make sure the exit is not abnormal. */
1586 edge e = single_exit (loop);
1587 if (e->flags & EDGE_ABNORMAL)
1588 return opt_result::failure_at (vect_location,
1589 "not vectorized:"
1590 " abnormal loop exit edge.\n");
1591
1592 info->loop_cond
1593 = vect_get_loop_niters (loop, &info->assumptions,
1594 &info->number_of_iterations,
1595 &info->number_of_iterationsm1);
1596 if (!info->loop_cond)
1597 return opt_result::failure_at
1598 (vect_location,
1599 "not vectorized: complicated exit condition.\n");
1600
1601 if (integer_zerop (info->assumptions)
1602 || !info->number_of_iterations
1603 || chrec_contains_undetermined (info->number_of_iterations))
1604 return opt_result::failure_at
1605 (info->loop_cond,
1606 "not vectorized: number of iterations cannot be computed.\n");
1607
1608 if (integer_zerop (info->number_of_iterations))
1609 return opt_result::failure_at
1610 (info->loop_cond,
1611 "not vectorized: number of iterations = 0.\n");
1612
1613 if (!(tree_fits_shwi_p (info->number_of_iterations)
1614 && tree_to_shwi (info->number_of_iterations) > 0))
1615 {
1616 if (dump_enabled_p ())
1617 {
1618 dump_printf_loc (MSG_NOTE, vect_location,
1619 "Symbolic number of iterations is ");
1620 dump_generic_expr (MSG_NOTE, TDF_DETAILS, info->number_of_iterations);
1621 dump_printf (MSG_NOTE, "\n");
1622 }
1623 }
1624
1625 return opt_result::success ();
1626}
1627
1628/* Create a loop_vec_info for LOOP with SHARED and the
1629 vect_analyze_loop_form result. */
1630
1631loop_vec_info
1632vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
1633 const vect_loop_form_info *info,
1634 loop_vec_info main_loop_info)
1635{
1636 loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared);
1637 LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1 = info->number_of_iterationsm1;
1638 LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters = info->number_of_iterations;
1639 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo)(loop_vinfo)->num_iters_unchanged = info->number_of_iterations;
1640 LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = main_loop_info;
1641 /* Also record the assumptions for versioning. */
1642 if (!integer_onep (info->assumptions) && !main_loop_info)
1643 LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo)(loop_vinfo)->num_iters_assumptions = info->assumptions;
1644
1645 stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (info->loop_cond);
1646 STMT_VINFO_TYPE (loop_cond_info)(loop_cond_info)->type = loop_exit_ctrl_vec_info_type;
1647 if (info->inner_loop_cond)
1648 {
1649 stmt_vec_info inner_loop_cond_info
1650 = loop_vinfo->lookup_stmt (info->inner_loop_cond);
1651 STMT_VINFO_TYPE (inner_loop_cond_info)(inner_loop_cond_info)->type = loop_exit_ctrl_vec_info_type;
1652 /* If we have an estimate on the number of iterations of the inner
1653 loop use that to limit the scale for costing, otherwise use
1654 --param vect-inner-loop-cost-factor literally. */
1655 widest_int nit;
1656 if (estimated_stmt_executions (loop->inner, &nit))
1657 LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)(loop_vinfo)->inner_loop_cost_factor
1658 = wi::smin (nit, param_vect_inner_loop_cost_factorglobal_options.x_param_vect_inner_loop_cost_factor).to_uhwi ();
1659 }
1660
1661 return loop_vinfo;
1662}
1663
1664
1665
1666/* Scan the loop stmts and dependent on whether there are any (non-)SLP
1667 statements update the vectorization factor. */
1668
1669static void
1670vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1671{
1672 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1673 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
1674 int nbbs = loop->num_nodes;
1675 poly_uint64 vectorization_factor;
1676 int i;
1677
1678 DUMP_VECT_SCOPE ("vect_update_vf_for_slp")auto_dump_scope scope ("vect_update_vf_for_slp", vect_location
)
;
1679
1680 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
1681 gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1681, __FUNCTION__), 0 : 0))
;
1682
1683 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1684 vectorization factor of the loop is the unrolling factor required by
1685 the SLP instances. If that unrolling factor is 1, we say, that we
1686 perform pure SLP on loop - cross iteration parallelism is not
1687 exploited. */
1688 bool only_slp_in_loop = true;
1689 for (i = 0; i < nbbs; i++)
1690 {
1691 basic_block bb = bbs[i];
1692 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1693 gsi_next (&si))
1694 {
1695 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
1696 if (!stmt_info)
1697 continue;
1698 if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
1699 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info
)->def_type) == vect_double_reduction_def) || (((stmt_info
)->def_type) == vect_nested_cycle))
)
1700 && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1701 /* STMT needs both SLP and loop-based vectorization. */
1702 only_slp_in_loop = false;
1703 }
1704 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1705 gsi_next (&si))
1706 {
1707 if (is_gimple_debug (gsi_stmt (si)))
1708 continue;
1709 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
1710 stmt_info = vect_stmt_to_vectorize (stmt_info);
1711 if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
1712 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info
)->def_type) == vect_double_reduction_def) || (((stmt_info
)->def_type) == vect_nested_cycle))
)
1713 && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1714 /* STMT needs both SLP and loop-based vectorization. */
1715 only_slp_in_loop = false;
1716 }
1717 }
1718
1719 if (only_slp_in_loop)
1720 {
1721 if (dump_enabled_p ())
1722 dump_printf_loc (MSG_NOTE, vect_location,
1723 "Loop contains only SLP stmts\n");
1724 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor;
1725 }
1726 else
1727 {
1728 if (dump_enabled_p ())
1729 dump_printf_loc (MSG_NOTE, vect_location,
1730 "Loop contains SLP and non-SLP stmts\n");
1731 /* Both the vectorization factor and unroll factor have the form
1732 GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
1733 so they must have a common multiple. */
1734 vectorization_factor
1735 = force_common_multiple (vectorization_factor,
1736 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor);
1737 }
1738
1739 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor;
1740 if (dump_enabled_p ())
1741 {
1742 dump_printf_loc (MSG_NOTE, vect_location,
1743 "Updating vectorization factor to ");
1744 dump_dec (MSG_NOTE, vectorization_factor);
1745 dump_printf (MSG_NOTE, ".\n");
1746 }
1747}
1748
1749/* Return true if STMT_INFO describes a double reduction phi and if
1750 the other phi in the reduction is also relevant for vectorization.
1751 This rejects cases such as:
1752
1753 outer1:
1754 x_1 = PHI <x_3(outer2), ...>;
1755 ...
1756
1757 inner:
1758 x_2 = ...;
1759 ...
1760
1761 outer2:
1762 x_3 = PHI <x_2(inner)>;
1763
1764 if nothing in x_2 or elsewhere makes x_1 relevant. */
1765
1766static bool
1767vect_active_double_reduction_p (stmt_vec_info stmt_info)
1768{
1769 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_double_reduction_def)
1770 return false;
1771
1772 return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info))(((stmt_info)->reduc_def)->relevant != vect_unused_in_scope
)
;
1773}
1774
1775/* Function vect_analyze_loop_operations.
1776
1777 Scan the loop stmts and make sure they are all vectorizable. */
1778
1779static opt_result
1780vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1781{
1782 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1783 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
1784 int nbbs = loop->num_nodes;
1785 int i;
1786 stmt_vec_info stmt_info;
1787 bool need_to_vectorize = false;
1788 bool ok;
1789
1790 DUMP_VECT_SCOPE ("vect_analyze_loop_operations")auto_dump_scope scope ("vect_analyze_loop_operations", vect_location
)
;
1791
1792 auto_vec<stmt_info_for_cost> cost_vec;
1793
1794 for (i = 0; i < nbbs; i++)
1795 {
1796 basic_block bb = bbs[i];
1797
1798 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1799 gsi_next (&si))
1800 {
1801 gphi *phi = si.phi ();
1802 ok = true;
1803
1804 stmt_info = loop_vinfo->lookup_stmt (phi);
1805 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G",
1807 (gimple *) phi);
1808 if (virtual_operand_p (gimple_phi_result (phi)))
1809 continue;
1810
1811 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1812 (i.e., a phi in the tail of the outer-loop). */
1813 if (! is_loop_header_bb_p (bb))
1814 {
1815 /* FORNOW: we currently don't support the case that these phis
1816 are not used in the outerloop (unless it is double reduction,
1817 i.e., this phi is vect_reduction_def), cause this case
1818 requires to actually do something here. */
1819 if (STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live
1820 && !vect_active_double_reduction_p (stmt_info))
1821 return opt_result::failure_at (phi,
1822 "Unsupported loop-closed phi"
1823 " in outer-loop.\n");
1824
1825 /* If PHI is used in the outer loop, we check that its operand
1826 is defined in the inner loop. */
1827 if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope))
1828 {
1829 tree phi_op;
1830
1831 if (gimple_phi_num_args (phi) != 1)
1832 return opt_result::failure_at (phi, "unsupported phi");
1833
1834 phi_op = PHI_ARG_DEF (phi, 0)gimple_phi_arg_def ((phi), (0));
1835 stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
1836 if (!op_def_info)
1837 return opt_result::failure_at (phi, "unsupported phi\n");
1838
1839 if (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant != vect_used_in_outer
1840 && (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant
1841 != vect_used_in_outer_by_reduction))
1842 return opt_result::failure_at (phi, "unsupported phi\n");
1843
1844 if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_internal_def
1845 || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type
1846 == vect_double_reduction_def))
1847 && !vectorizable_lc_phi (loop_vinfo,
1848 stmt_info, NULLnullptr, NULLnullptr))
1849 return opt_result::failure_at (phi, "unsupported phi\n");
1850 }
1851
1852 continue;
1853 }
1854
1855 gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1855, __FUNCTION__), 0 : 0))
;
1856
1857 if ((STMT_VINFO_RELEVANT (stmt_info)(stmt_info)->relevant == vect_used_in_scope
1858 || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live)
1859 && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_induction_def
1860 && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_first_order_recurrence)
1861 /* A scalar-dependence cycle that we don't support. */
1862 return opt_result::failure_at (phi,
1863 "not vectorized:"
1864 " scalar dependence cycle.\n");
1865
1866 if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope))
1867 {
1868 need_to_vectorize = true;
1869 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_induction_def
1870 && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1871 ok = vectorizable_induction (loop_vinfo,
1872 stmt_info, NULLnullptr, NULLnullptr,
1873 &cost_vec);
1874 else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def
1875 || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type
1876 == vect_double_reduction_def)
1877 || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle)
1878 && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1879 ok = vectorizable_reduction (loop_vinfo,
1880 stmt_info, NULLnullptr, NULLnullptr, &cost_vec);
1881 else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type
1882 == vect_first_order_recurrence)
1883 && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1884 ok = vectorizable_recurr (loop_vinfo, stmt_info, NULLnullptr, NULLnullptr,
1885 &cost_vec);
1886 }
1887
1888 /* SLP PHIs are tested by vect_slp_analyze_node_operations. */
1889 if (ok
1890 && STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live
1891 && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1892 ok = vectorizable_live_operation (loop_vinfo,
1893 stmt_info, NULLnullptr, NULLnullptr, NULLnullptr,
1894 -1, false, &cost_vec);
1895
1896 if (!ok)
1897 return opt_result::failure_at (phi,
1898 "not vectorized: relevant phi not "
1899 "supported: %G",
1900 static_cast <gimple *> (phi));
1901 }
1902
1903 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1904 gsi_next (&si))
1905 {
1906 gimple *stmt = gsi_stmt (si);
1907 if (!gimple_clobber_p (stmt)
1908 && !is_gimple_debug (stmt))
1909 {
1910 opt_result res
1911 = vect_analyze_stmt (loop_vinfo,
1912 loop_vinfo->lookup_stmt (stmt),
1913 &need_to_vectorize,
1914 NULLnullptr, NULLnullptr, &cost_vec);
1915 if (!res)
1916 return res;
1917 }
1918 }
1919 } /* bbs */
1920
1921 add_stmt_costs (loop_vinfo->vector_costs, &cost_vec);
1922
1923 /* All operations in the loop are either irrelevant (deal with loop
1924 control, or dead), or only used outside the loop and can be moved
1925 out of the loop (e.g. invariants, inductions). The loop can be
1926 optimized away by scalar optimizations. We're better off not
1927 touching this loop. */
1928 if (!need_to_vectorize)
1929 {
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_NOTE, vect_location,
1932 "All the computation can be taken out of the loop.\n");
1933 return opt_result::failure_at
1934 (vect_location,
1935 "not vectorized: redundant loop. no profit to vectorize.\n");
1936 }
1937
1938 return opt_result::success ();
1939}
1940
1941/* Return true if we know that the iteration count is smaller than the
1942 vectorization factor. Return false if it isn't, or if we can't be sure
1943 either way. */
1944
1945static bool
1946vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
1947{
1948 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
1949
1950 HOST_WIDE_INTlong max_niter;
1951 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
)
1952 max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 1952, __FUNCTION__))))
;
1953 else
1954 max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
1955
1956 if (max_niter != -1 && (unsigned HOST_WIDE_INTlong) max_niter < assumed_vf)
1957 return true;
1958
1959 return false;
1960}
1961
1962/* Analyze the cost of the loop described by LOOP_VINFO. Decide if it
1963 is worthwhile to vectorize. Return 1 if definitely yes, 0 if
1964 definitely no, or -1 if it's worth retrying. */
1965
1966static int
1967vect_analyze_loop_costing (loop_vec_info loop_vinfo,
1968 unsigned *suggested_unroll_factor)
1969{
1970 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1971 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
1972
1973 /* Only loops that can handle partially-populated vectors can have iteration
1974 counts less than the vectorization factor. */
1975 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
1976 {
1977 if (vect_known_niters_smaller_than_vf (loop_vinfo))
1978 {
1979 if (dump_enabled_p ())
1980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1981 "not vectorized: iteration count smaller than "
1982 "vectorization factor.\n");
1983 return 0;
1984 }
1985 }
1986
1987 /* If using the "very cheap" model. reject cases in which we'd keep
1988 a copy of the scalar code (even if we might be able to vectorize it). */
1989 if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
1990 && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment
1991 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
1992 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter))
1993 {
1994 if (dump_enabled_p ())
1995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1996 "some scalar iterations would need to be peeled\n");
1997 return 0;
1998 }
1999
2000 int min_profitable_iters, min_profitable_estimate;
2001 vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2002 &min_profitable_estimate,
2003 suggested_unroll_factor);
2004
2005 if (min_profitable_iters < 0)
2006 {
2007 if (dump_enabled_p ())
2008 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2009 "not vectorized: vectorization not profitable.\n");
2010 if (dump_enabled_p ())
2011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2012 "not vectorized: vector version will never be "
2013 "profitable.\n");
2014 return -1;
2015 }
2016
2017 int min_scalar_loop_bound = (param_min_vect_loop_boundglobal_options.x_param_min_vect_loop_bound
2018 * assumed_vf);
2019
2020 /* Use the cost model only if it is more conservative than user specified
2021 threshold. */
2022 unsigned int th = (unsigned) MAX (min_scalar_loop_bound,((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound
) : (min_profitable_iters))
2023 min_profitable_iters)((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound
) : (min_profitable_iters))
;
2024
2025 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = th;
2026
2027 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
2028 && LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2028, __FUNCTION__))))
< th)
2029 {
2030 if (dump_enabled_p ())
2031 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2032 "not vectorized: vectorization not profitable.\n");
2033 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_NOTE, vect_location,
2035 "not vectorized: iteration count smaller than user "
2036 "specified loop bound parameter or minimum profitable "
2037 "iterations (whichever is more conservative).\n");
2038 return 0;
2039 }
2040
2041 /* The static profitablity threshold min_profitable_estimate includes
2042 the cost of having to check at runtime whether the scalar loop
2043 should be used instead. If it turns out that we don't need or want
2044 such a check, the threshold we should use for the static estimate
2045 is simply the point at which the vector loop becomes more profitable
2046 than the scalar loop. */
2047 if (min_profitable_estimate > min_profitable_iters
2048 && !LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
2049 && !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter
2050 && !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment
2051 && !vect_apply_runtime_profitability_check_p (loop_vinfo))
2052 {
2053 if (dump_enabled_p ())
2054 dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime"
2055 " choice between the scalar and vector loops\n");
2056 min_profitable_estimate = min_profitable_iters;
2057 }
2058
2059 /* If the vector loop needs multiple iterations to be beneficial then
2060 things are probably too close to call, and the conservative thing
2061 would be to stick with the scalar code. */
2062 if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
2063 && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
2064 {
2065 if (dump_enabled_p ())
2066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2067 "one iteration of the vector loop would be"
2068 " more expensive than the equivalent number of"
2069 " iterations of the scalar loop\n");
2070 return 0;
2071 }
2072
2073 HOST_WIDE_INTlong estimated_niter;
2074
2075 /* If we are vectorizing an epilogue then we know the maximum number of
2076 scalar iterations it will cover is at least one lower than the
2077 vectorization factor of the main loop. */
2078 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
2079 estimated_niter
2080 = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) - 1;
2081 else
2082 {
2083 estimated_niter = estimated_stmt_executions_int (loop);
2084 if (estimated_niter == -1)
2085 estimated_niter = likely_max_stmt_executions_int (loop);
2086 }
2087 if (estimated_niter != -1
2088 && ((unsigned HOST_WIDE_INTlong) estimated_niter
2089 < MAX (th, (unsigned) min_profitable_estimate)((th) > ((unsigned) min_profitable_estimate) ? (th) : ((unsigned
) min_profitable_estimate))
))
2090 {
2091 if (dump_enabled_p ())
2092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2093 "not vectorized: estimated iteration count too "
2094 "small.\n");
2095 if (dump_enabled_p ())
2096 dump_printf_loc (MSG_NOTE, vect_location,
2097 "not vectorized: estimated iteration count smaller "
2098 "than specified loop bound parameter or minimum "
2099 "profitable iterations (whichever is more "
2100 "conservative).\n");
2101 return -1;
2102 }
2103
2104 return 1;
2105}
2106
2107static opt_result
2108vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
2109 vec<data_reference_p> *datarefs,
2110 unsigned int *n_stmts)
2111{
2112 *n_stmts = 0;
2113 for (unsigned i = 0; i < loop->num_nodes; i++)
2114 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
2115 !gsi_end_p (gsi); gsi_next (&gsi))
2116 {
2117 gimple *stmt = gsi_stmt (gsi);
2118 if (is_gimple_debug (stmt))
2119 continue;
2120 ++(*n_stmts);
2121 opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,
2122 NULLnullptr, 0);
2123 if (!res)
2124 {
2125 if (is_gimple_call (stmt) && loop->safelen)
2126 {
2127 tree fndecl = gimple_call_fndecl (stmt), op;
2128 if (fndecl == NULL_TREE(tree) nullptr
2129 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
2130 {
2131 fndecl = gimple_call_arg (stmt, 0);
2132 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR)((void)(!(((enum tree_code) (fndecl)->base.code) == ADDR_EXPR
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2132, __FUNCTION__), 0 : 0))
;
2133 fndecl = TREE_OPERAND (fndecl, 0)(*((const_cast<tree*> (tree_operand_check ((fndecl), (0
), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2133, __FUNCTION__)))))
;
2134 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL)((void)(!(((enum tree_code) (fndecl)->base.code) == FUNCTION_DECL
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2134, __FUNCTION__), 0 : 0))
;
2135 }
2136 if (fndecl != NULL_TREE(tree) nullptr)
2137 {
2138 cgraph_node *node = cgraph_node::get (fndecl);
2139 if (node != NULLnullptr && node->simd_clones != NULLnullptr)
2140 {
2141 unsigned int j, n = gimple_call_num_args (stmt);
2142 for (j = 0; j < n; j++)
2143 {
2144 op = gimple_call_arg (stmt, j);
2145 if (DECL_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code
) (op)->base.code))] == tcc_declaration)
2146 || (REFERENCE_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code
) (op)->base.code))] == tcc_reference)
2147 && get_base_address (op)))
2148 break;
2149 }
2150 op = gimple_call_lhs (stmt);
2151 /* Ignore #pragma omp declare simd functions
2152 if they don't have data references in the
2153 call stmt itself. */
2154 if (j == n
2155 && !(op
2156 && (DECL_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code
) (op)->base.code))] == tcc_declaration)
2157 || (REFERENCE_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code
) (op)->base.code))] == tcc_reference)
2158 && get_base_address (op)))))
2159 continue;
2160 }
2161 }
2162 }
2163 return res;
2164 }
2165 /* If dependence analysis will give up due to the limit on the
2166 number of datarefs stop here and fail fatally. */
2167 if (datarefs->length ()
2168 > (unsigned)param_loop_max_datarefs_for_datadepsglobal_options.x_param_loop_max_datarefs_for_datadeps)
2169 return opt_result::failure_at (stmt, "exceeded param "
2170 "loop-max-datarefs-for-datadeps\n");
2171 }
2172 return opt_result::success ();
2173}
2174
2175/* Look for SLP-only access groups and turn each individual access into its own
2176 group. */
2177static void
2178vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
2179{
2180 unsigned int i;
2181 struct data_reference *dr;
2182
2183 DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups")auto_dump_scope scope ("vect_dissolve_slp_only_groups", vect_location
)
;
2184
2185 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs;
2186 FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i))
2187 {
2188 gcc_assert (DR_REF (dr))((void)(!((dr)->ref) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2188, __FUNCTION__), 0 : 0))
;
2189 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr)(dr)->stmt);
2190
2191 /* Check if the load is a part of an interleaving chain. */
2192 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)->
dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2192, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
))
)
2193 {
2194 stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2194, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
;
2195 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element)(((void)(!((first_element)->dr_aux.stmt == (first_element)
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2195, __FUNCTION__), 0 : 0)), &(first_element)->dr_aux
)
;
2196 unsigned int group_size = DR_GROUP_SIZE (first_element)(((void)(!((first_element)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2196, __FUNCTION__), 0 : 0)), (first_element)->size)
;
2197
2198 /* Check if SLP-only groups. */
2199 if (!STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type
2200 && STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p)
2201 {
2202 /* Dissolve the group. */
2203 STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p = false;
2204
2205 stmt_vec_info vinfo = first_element;
2206 while (vinfo)
2207 {
2208 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2208, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
;
2209 DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2209, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
= vinfo;
2210 DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2210, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
= NULLnullptr;
2211 DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2211, __FUNCTION__), 0 : 0)), (vinfo)->size)
= 1;
2212 if (STMT_VINFO_STRIDED_P (first_element)(first_element)->strided_p)
2213 DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2213, __FUNCTION__), 0 : 0)), (vinfo)->gap)
= 0;
2214 else
2215 DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2215, __FUNCTION__), 0 : 0)), (vinfo)->gap)
= group_size - 1;
2216 /* Duplicate and adjust alignment info, it needs to
2217 be present on each group leader, see dr_misalignment. */
2218 if (vinfo != first_element)
2219 {
2220 dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo)(((void)(!((vinfo)->dr_aux.stmt == (vinfo)) ? fancy_abort (
"/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2220, __FUNCTION__), 0 : 0)), &(vinfo)->dr_aux)
;
2221 dr_info2->target_alignment = dr_info->target_alignment;
2222 int misalignment = dr_info->misalignment;
2223 if (misalignment != DR_MISALIGNMENT_UNKNOWN(-1))
2224 {
2225 HOST_WIDE_INTlong diff
2226 = (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info2->dr)
->innermost.init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2226, __FUNCTION__)))
2227 - TREE_INT_CST_LOW (DR_INIT (dr_info->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info->dr)->
innermost.init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2227, __FUNCTION__)))
);
2228 unsigned HOST_WIDE_INTlong align_c
2229 = dr_info->target_alignment.to_constant ();
2230 misalignment = (misalignment + diff) % align_c;
2231 }
2232 dr_info2->misalignment = misalignment;
2233 }
2234 vinfo = next;
2235 }
2236 }
2237 }
2238 }
2239}
2240
2241/* Determine if operating on full vectors for LOOP_VINFO might leave
2242 some scalar iterations still to do. If so, decide how we should
2243 handle those scalar iterations. The possibilities are:
2244
2245 (1) Make LOOP_VINFO operate on partial vectors instead of full vectors.
2246 In this case:
2247
2248 LOOP_VINFO_USING_PARTIAL_VECTORS_P == true
2249 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2250 LOOP_VINFO_PEELING_FOR_NITER == false
2251
2252 (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop
2253 to handle the remaining scalar iterations. In this case:
2254
2255 LOOP_VINFO_USING_PARTIAL_VECTORS_P == false
2256 LOOP_VINFO_PEELING_FOR_NITER == true
2257
2258 There are two choices:
2259
2260 (2a) Consider vectorizing the epilogue loop at the same VF as the
2261 main loop, but using partial vectors instead of full vectors.
2262 In this case:
2263
2264 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true
2265
2266 (2b) Consider vectorizing the epilogue loop at lower VFs only.
2267 In this case:
2268
2269 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2270
2271 When FOR_EPILOGUE_P is true, make this determination based on the
2272 assumption that LOOP_VINFO is an epilogue loop, otherwise make it
2273 based on the assumption that LOOP_VINFO is the main loop. The caller
2274 has made sure that the number of iterations is set appropriately for
2275 this value of FOR_EPILOGUE_P. */
2276
2277opt_result
2278vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
2279 bool for_epilogue_p)
2280{
2281 /* Determine whether there would be any scalar iterations left over. */
2282 bool need_peeling_or_partial_vectors_p
2283 = vect_need_peeling_or_partial_vectors_p (loop_vinfo);
2284
2285 /* Decide whether to vectorize the loop with partial vectors. */
2286 LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = false;
2287 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = false;
2288 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2289 && need_peeling_or_partial_vectors_p)
2290 {
2291 /* For partial-vector-usage=1, try to push the handling of partial
2292 vectors to the epilogue, with the main loop continuing to operate
2293 on full vectors.
2294
2295 If we are unrolling we also do not want to use partial vectors. This
2296 is to avoid the overhead of generating multiple masks and also to
2297 avoid having to execute entire iterations of FALSE masked instructions
2298 when dealing with one or less full iterations.
2299
2300 ??? We could then end up failing to use partial vectors if we
2301 decide to peel iterations into a prologue, and if the main loop
2302 then ends up processing fewer than VF iterations. */
2303 if ((param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage == 1
2304 || loop_vinfo->suggested_unroll_factor > 1)
2305 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)
2306 && !vect_known_niters_smaller_than_vf (loop_vinfo))
2307 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = true;
2308 else
2309 LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = true;
2310 }
2311
2312 if (dump_enabled_p ())
2313 {
2314 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2315 dump_printf_loc (MSG_NOTE, vect_location,
2316 "operating on partial vectors%s.\n",
2317 for_epilogue_p ? " for epilogue loop" : "");
2318 else
2319 dump_printf_loc (MSG_NOTE, vect_location,
2320 "operating only on full vectors%s.\n",
2321 for_epilogue_p ? " for epilogue loop" : "");
2322 }
2323
2324 if (for_epilogue_p)
2325 {
2326 loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info;
2327 gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2327, __FUNCTION__), 0 : 0))
;
2328 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2329 gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor
, (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2330, __FUNCTION__), 0 : 0))
2330 LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor
, (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2330, __FUNCTION__), 0 : 0))
;
2331 }
2332
2333 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
2334 && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2335 {
2336 /* Check that the loop processes at least one full vector. */
2337 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2338 tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters;
2339 if (known_lt (wi::to_widest (scalar_niters), vf)(!maybe_le (vf, wi::to_widest (scalar_niters))))
2340 return opt_result::failure_at (vect_location,
2341 "loop does not have enough iterations"
2342 " to support vectorization.\n");
2343
2344 /* If we need to peel an extra epilogue iteration to handle data
2345 accesses with gaps, check that there are enough scalar iterations
2346 available.
2347
2348 The check above is redundant with this one when peeling for gaps,
2349 but the distinction is useful for diagnostics. */
2350 tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1;
2351 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
2352 && known_lt (wi::to_widest (scalar_nitersm1), vf)(!maybe_le (vf, wi::to_widest (scalar_nitersm1))))
2353 return opt_result::failure_at (vect_location,
2354 "loop does not have enough iterations"
2355 " to support peeling for gaps.\n");
2356 }
2357
2358 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter
2359 = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p
2360 && need_peeling_or_partial_vectors_p);
2361
2362 return opt_result::success ();
2363}
2364
2365/* Function vect_analyze_loop_2.
2366
2367 Apply a set of analyses on LOOP specified by LOOP_VINFO, the different
2368 analyses will record information in some members of LOOP_VINFO. FATAL
2369 indicates if some analysis meets fatal error. If one non-NULL pointer
2370 SUGGESTED_UNROLL_FACTOR is provided, it's intent to be filled with one
2371 worked out suggested unroll factor, while one NULL pointer shows it's
2372 going to apply the suggested unroll factor. SLP_DONE_FOR_SUGGESTED_UF
2373 is to hold the slp decision when the suggested unroll factor is worked
2374 out. */
2375static opt_result
2376vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
2377 unsigned *suggested_unroll_factor,
2378 bool& slp_done_for_suggested_uf)
2379{
2380 opt_result ok = opt_result::success ();
2381 int res;
2382 unsigned int max_vf = MAX_VECTORIZATION_FACTOR2147483647;
2383 poly_uint64 min_vf = 2;
2384 loop_vec_info orig_loop_vinfo = NULLnullptr;
2385
2386 /* If we are dealing with an epilogue then orig_loop_vinfo points to the
2387 loop_vec_info of the first vectorized loop. */
2388 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
2389 orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info;
2390 else
2391 orig_loop_vinfo = loop_vinfo;
2392 gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2392, __FUNCTION__), 0 : 0))
;
2393
2394 /* The first group of checks is independent of the vector size. */
2395 fatal = true;
2396
2397 if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond
2398 && integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond))
2399 return opt_result::failure_at (vect_location,
2400 "not vectorized: simd if(0)\n");
2401
2402 /* Find all data references in the loop (which correspond to vdefs/vuses)
2403 and analyze their evolution in the loop. */
2404
2405 loop_p loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
2406
2407 /* Gather the data references and count stmts in the loop. */
2408 if (!LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.exists ())
2409 {
2410 opt_result res
2411 = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs,
2412 &LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs,
2413 &LOOP_VINFO_N_STMTS (loop_vinfo)(loop_vinfo)->shared->n_stmts);
2414 if (!res)
2415 {
2416 if (dump_enabled_p ())
2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2418 "not vectorized: loop contains function "
2419 "calls or data references that cannot "
2420 "be analyzed\n");
2421 return res;
2422 }
2423 loop_vinfo->shared->save_datarefs ();
2424 }
2425 else
2426 loop_vinfo->shared->check_datarefs ();
2427
2428 /* Analyze the data references and also adjust the minimal
2429 vectorization factor according to the loads and stores. */
2430
2431 ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
2432 if (!ok)
2433 {
2434 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2436 "bad data references.\n");
2437 return ok;
2438 }
2439
2440 /* Check if we are applying unroll factor now. */
2441 bool applying_suggested_uf = loop_vinfo->suggested_unroll_factor > 1;
2442 gcc_assert (!applying_suggested_uf || !suggested_unroll_factor)((void)(!(!applying_suggested_uf || !suggested_unroll_factor)
? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2442, __FUNCTION__), 0 : 0))
;
2443
2444 /* If the slp decision is false when suggested unroll factor is worked
2445 out, and we are applying suggested unroll factor, we can simply skip
2446 all slp related analyses this time. */
2447 bool slp = !applying_suggested_uf || slp_done_for_suggested_uf;
2448
2449 /* Classify all cross-iteration scalar data-flow cycles.
2450 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2451 vect_analyze_scalar_cycles (loop_vinfo, slp);
2452
2453 vect_pattern_recog (loop_vinfo);
2454
2455 vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
2456
2457 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2458 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2459
2460 ok = vect_analyze_data_ref_accesses (loop_vinfo, NULLnullptr);
2461 if (!ok)
2462 {
2463 if (dump_enabled_p ())
2464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2465 "bad data access.\n");
2466 return ok;
2467 }
2468
2469 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2470
2471 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal);
2472 if (!ok)
2473 {
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2476 "unexpected pattern.\n");
2477 return ok;
2478 }
2479
2480 /* While the rest of the analysis below depends on it in some way. */
2481 fatal = false;
2482
2483 /* Analyze data dependences between the data-refs in the loop
2484 and adjust the maximum vectorization factor according to
2485 the dependences.
2486 FORNOW: fail at the first data dependence that we encounter. */
2487
2488 ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
2489 if (!ok)
2490 {
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "bad data dependence.\n");
2494 return ok;
2495 }
2496 if (max_vf != MAX_VECTORIZATION_FACTOR2147483647
2497 && maybe_lt (max_vf, min_vf))
2498 return opt_result::failure_at (vect_location, "bad data dependence.\n");
2499 LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor = max_vf;
2500
2501 ok = vect_determine_vectorization_factor (loop_vinfo);
2502 if (!ok)
2503 {
2504 if (dump_enabled_p ())
2505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2506 "can't determine vectorization factor.\n");
2507 return ok;
2508 }
2509 if (max_vf != MAX_VECTORIZATION_FACTOR2147483647
2510 && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor))
2511 return opt_result::failure_at (vect_location, "bad data dependence.\n");
2512
2513 /* Compute the scalar iteration cost. */
2514 vect_compute_single_scalar_iteration_cost (loop_vinfo);
2515
2516 poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2517
2518 if (slp)
2519 {
2520 /* Check the SLP opportunities in the loop, analyze and build
2521 SLP trees. */
2522 ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo)(loop_vinfo)->shared->n_stmts);
2523 if (!ok)
2524 return ok;
2525
2526 /* If there are any SLP instances mark them as pure_slp. */
2527 slp = vect_make_slp_decision (loop_vinfo);
2528 if (slp)
2529 {
2530 /* Find stmts that need to be both vectorized and SLPed. */
2531 vect_detect_hybrid_slp (loop_vinfo);
2532
2533 /* Update the vectorization factor based on the SLP decision. */
2534 vect_update_vf_for_slp (loop_vinfo);
2535
2536 /* Optimize the SLP graph with the vectorization factor fixed. */
2537 vect_optimize_slp (loop_vinfo);
2538
2539 /* Gather the loads reachable from the SLP graph entries. */
2540 vect_gather_slp_loads (loop_vinfo);
2541 }
2542 }
2543
2544 bool saved_can_use_partial_vectors_p
2545 = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p;
2546
2547 /* We don't expect to have to roll back to anything other than an empty
2548 set of rgroups. */
2549 gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())((void)(!((loop_vinfo)->masks.is_empty ()) ? fancy_abort (
"/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2549, __FUNCTION__), 0 : 0))
;
2550
2551 /* This is the point where we can re-start analysis with SLP forced off. */
2552start_over:
2553
2554 /* Apply the suggested unrolling factor, this was determined by the backend
2555 during finish_cost the first time we ran the analyzis for this
2556 vector mode. */
2557 if (applying_suggested_uf)
2558 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor *= loop_vinfo->suggested_unroll_factor;
2559
2560 /* Now the vectorization factor is final. */
2561 poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2562 gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2562, __FUNCTION__), 0 : 0))
;
2563
2564 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
&& dump_enabled_p ())
2565 {
2566 dump_printf_loc (MSG_NOTE, vect_location,
2567 "vectorization_factor = ");
2568 dump_dec (MSG_NOTE, vectorization_factor);
2569 dump_printf (MSG_NOTE, ", niters = %wd\n",
2570 LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2570, __FUNCTION__))))
);
2571 }
2572
2573 loop_vinfo->vector_costs = init_cost (loop_vinfo, false);
2574
2575 /* Analyze the alignment of the data-refs in the loop.
2576 Fail if a data reference is found that cannot be vectorized. */
2577
2578 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2579 if (!ok)
2580 {
2581 if (dump_enabled_p ())
2582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2583 "bad data alignment.\n");
2584 return ok;
2585 }
2586
2587 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
2588 It is important to call pruning after vect_analyze_data_ref_accesses,
2589 since we use grouping information gathered by interleaving analysis. */
2590 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2591 if (!ok)
2592 return ok;
2593
2594 /* Do not invoke vect_enhance_data_refs_alignment for epilogue
2595 vectorization, since we do not want to add extra peeling or
2596 add versioning for alignment. */
2597 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
2598 /* This pass will decide on using loop versioning and/or loop peeling in
2599 order to enhance the alignment of data references in the loop. */
2600 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2601 if (!ok)
2602 return ok;
2603
2604 if (slp)
2605 {
2606 /* Analyze operations in the SLP instances. Note this may
2607 remove unsupported SLP instances which makes the above
2608 SLP kind detection invalid. */
2609 unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length ();
2610 vect_slp_analyze_operations (loop_vinfo);
2611 if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length () != old_size)
2612 {
2613 ok = opt_result::failure_at (vect_location,
2614 "unsupported SLP instances\n");
2615 goto again;
2616 }
2617
2618 /* Check whether any load in ALL SLP instances is possibly permuted. */
2619 slp_tree load_node, slp_root;
2620 unsigned i, x;
2621 slp_instance instance;
2622 bool can_use_lanes = true;
2623 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)for (x = 0; ((loop_vinfo)->slp_instances).iterate ((x), &
(instance)); ++(x))
2624 {
2625 slp_root = SLP_INSTANCE_TREE (instance)(instance)->root;
2626 int group_size = SLP_TREE_LANES (slp_root)(slp_root)->lanes;
2627 tree vectype = SLP_TREE_VECTYPE (slp_root)(slp_root)->vectype;
2628 bool loads_permuted = false;
2629 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node
)); ++(i))
2630 {
2631 if (!SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation.exists ())
2632 continue;
2633 unsigned j;
2634 stmt_vec_info load_info;
2635 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)for (j = 0; ((load_node)->stmts).iterate ((j), &(load_info
)); ++(j))
2636 if (SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation[j] != j)
2637 {
2638 loads_permuted = true;
2639 break;
2640 }
2641 }
2642
2643 /* If the loads and stores can be handled with load/store-lane
2644 instructions record it and move on to the next instance. */
2645 if (loads_permuted
2646 && SLP_INSTANCE_KIND (instance)(instance)->kind == slp_inst_kind_store
2647 && vect_store_lanes_supported (vectype, group_size, false))
2648 {
2649 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node
)); ++(i))
2650 {
2651 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2652, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])->
first_element)
2652 (SLP_TREE_SCALAR_STMTS (load_node)[0])(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2652, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])->
first_element)
;
2653 /* Use SLP for strided accesses (or if we can't
2654 load-lanes). */
2655 if (STMT_VINFO_STRIDED_P (stmt_vinfo)(stmt_vinfo)->strided_p
2656 || ! vect_load_lanes_supported
2657 (STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype,
2658 DR_GROUP_SIZE (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2658, __FUNCTION__), 0 : 0)), (stmt_vinfo)->size)
, false))
2659 break;
2660 }
2661
2662 can_use_lanes
2663 = can_use_lanes && i == SLP_INSTANCE_LOADS (instance)(instance)->loads.length ();
2664
2665 if (can_use_lanes && dump_enabled_p ())
2666 dump_printf_loc (MSG_NOTE, vect_location,
2667 "SLP instance %p can use load/store-lanes\n",
2668 (void *) instance);
2669 }
2670 else
2671 {
2672 can_use_lanes = false;
2673 break;
2674 }
2675 }
2676
2677 /* If all SLP instances can use load/store-lanes abort SLP and try again
2678 with SLP disabled. */
2679 if (can_use_lanes)
2680 {
2681 ok = opt_result::failure_at (vect_location,
2682 "Built SLP cancelled: can use "
2683 "load/store-lanes\n");
2684 if (dump_enabled_p ())
2685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2686 "Built SLP cancelled: all SLP instances support "
2687 "load/store-lanes\n");
2688 goto again;
2689 }
2690 }
2691
2692 /* Dissolve SLP-only groups. */
2693 vect_dissolve_slp_only_groups (loop_vinfo);
2694
2695 /* Scan all the remaining operations in the loop that are not subject
2696 to SLP and make sure they are vectorizable. */
2697 ok = vect_analyze_loop_operations (loop_vinfo);
2698 if (!ok)
2699 {
2700 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2702 "bad operation or unsupported loop bound.\n");
2703 return ok;
2704 }
2705
2706 /* For now, we don't expect to mix both masking and length approaches for one
2707 loop, disable it if both are recorded. */
2708 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2709 && !LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ()
2710 && !LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ())
2711 {
2712 if (dump_enabled_p ())
2713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2714 "can't vectorize a loop with partial vectors"
2715 " because we don't expect to mix different"
2716 " approaches with partial vectors for the"
2717 " same loop.\n");
2718 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false;
2719 }
2720
2721 /* If we still have the option of using partial vectors,
2722 check whether we can generate the necessary loop controls. */
2723 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2724 && !vect_verify_full_masking (loop_vinfo)
2725 && !vect_verify_loop_lens (loop_vinfo))
2726 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false;
2727
2728 /* If we're vectorizing an epilogue loop, the vectorized loop either needs
2729 to be able to handle fewer than VF scalars, or needs to have a lower VF
2730 than the main loop. */
2731 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)
2732 && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2733 && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo
)->vectorization_factor)
2734 LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo
)->vectorization_factor)
)
2735 return opt_result::failure_at (vect_location,
2736 "Vectorization factor too high for"
2737 " epilogue loop.\n");
2738
2739 /* Decide whether this loop_vinfo should use partial vectors or peeling,
2740 assuming that the loop will be used as a main loop. We will redo
2741 this analysis later if we instead decide to use the loop as an
2742 epilogue loop. */
2743 ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false);
2744 if (!ok)
2745 return ok;
2746
2747 /* Check the costings of the loop make vectorizing worthwhile. */
2748 res = vect_analyze_loop_costing (loop_vinfo, suggested_unroll_factor);
2749 if (res < 0)
2750 {
2751 ok = opt_result::failure_at (vect_location,
2752 "Loop costings may not be worthwhile.\n");
2753 goto again;
2754 }
2755 if (!res)
2756 return opt_result::failure_at (vect_location,
2757 "Loop costings not worthwhile.\n");
2758
2759 /* If an epilogue loop is required make sure we can create one. */
2760 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
2761 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter)
2762 {
2763 if (dump_enabled_p ())
2764 dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2765 if (!vect_can_advance_ivs_p (loop_vinfo)
2766 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop,
2767 single_exit (LOOP_VINFO_LOOP(loop_vinfo)->loop
2768 (loop_vinfo)(loop_vinfo)->loop)))
2769 {
2770 ok = opt_result::failure_at (vect_location,
2771 "not vectorized: can't create required "
2772 "epilog loop\n");
2773 goto again;
2774 }
2775 }
2776
2777 /* During peeling, we need to check if number of loop iterations is
2778 enough for both peeled prolog loop and vector loop. This check
2779 can be merged along with threshold check of loop versioning, so
2780 increase threshold for this case if necessary.
2781
2782 If we are analyzing an epilogue we still want to check what its
2783 versioning threshold would be. If we decide to vectorize the epilogues we
2784 will want to use the lowest versioning threshold of all epilogues and main
2785 loop. This will enable us to enter a vectorized epilogue even when
2786 versioning the loop. We can't simply check whether the epilogue requires
2787 versioning though since we may have skipped some versioning checks when
2788 analyzing the epilogue. For instance, checks for alias versioning will be
2789 skipped when dealing with epilogues as we assume we already checked them
2790 for the main loop. So instead we always check the 'orig_loop_vinfo'. */
2791 if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo)(((orig_loop_vinfo)->may_misalign_stmts.length () > 0) ||
((orig_loop_vinfo)->comp_alias_ddrs.length () > 0 || (
orig_loop_vinfo)->check_unequal_addrs.length () > 0 || (
orig_loop_vinfo)->lower_bounds.length () > 0) || ((orig_loop_vinfo
)->num_iters_assumptions) || ((orig_loop_vinfo)->simd_if_cond
))
)
2792 {
2793 poly_uint64 niters_th = 0;
2794 unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th;
2795
2796 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2797 {
2798 /* Niters for peeled prolog loop. */
2799 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0)
2800 {
2801 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo)(loop_vinfo)->unaligned_dr;
2802 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype;
2803 niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1;
2804 }
2805 else
2806 niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment;
2807 }
2808
2809 /* Niters for at least one iteration of vectorized loop. */
2810 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2811 niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2812 /* One additional iteration because of peeling for gap. */
2813 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps)
2814 niters_th += 1;
2815
2816 /* Use the same condition as vect_transform_loop to decide when to use
2817 the cost to determine a versioning threshold. */
2818 if (vect_apply_runtime_profitability_check_p (loop_vinfo)
2819 && ordered_p (th, niters_th))
2820 niters_th = ordered_max (poly_uint64 (th), niters_th);
2821
2822 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = niters_th;
2823 }
2824
2825 gcc_assert (known_eq (vectorization_factor,((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)->
vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2826, __FUNCTION__), 0 : 0))
2826 LOOP_VINFO_VECT_FACTOR (loop_vinfo)))((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)->
vectorization_factor))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2826, __FUNCTION__), 0 : 0))
;
2827
2828 slp_done_for_suggested_uf = slp;
2829
2830 /* Ok to vectorize! */
2831 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)(loop_vinfo)->vectorizable = 1;
2832 return opt_result::success ();
2833
2834again:
2835 /* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */
2836 gcc_assert (!ok)((void)(!(!ok) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2836, __FUNCTION__), 0 : 0))
;
2837
2838 /* Try again with SLP forced off but if we didn't do any SLP there is
2839 no point in re-trying. */
2840 if (!slp)
2841 return ok;
2842
2843 /* If the slp decision is true when suggested unroll factor is worked
2844 out, and we are applying suggested unroll factor, we don't need to
2845 re-try any more. */
2846 if (applying_suggested_uf && slp_done_for_suggested_uf)
2847 return ok;
2848
2849 /* If there are reduction chains re-trying will fail anyway. */
2850 if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.is_empty ())
2851 return ok;
2852
2853 /* Likewise if the grouped loads or stores in the SLP cannot be handled
2854 via interleaving or lane instructions. */
2855 slp_instance instance;
2856 slp_tree node;
2857 unsigned i, j;
2858 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)for (i = 0; ((loop_vinfo)->slp_instances).iterate ((i), &
(instance)); ++(i))
2859 {
2860 stmt_vec_info vinfo;
2861 vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))((instance)->root)->stmts[0];
2862 if (! STMT_VINFO_GROUPED_ACCESS (vinfo)((vinfo)->dr_aux.dr && (((void)(!((vinfo)->dr_aux
.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2862, __FUNCTION__), 0 : 0)), (vinfo)->first_element))
)
2863 continue;
2864 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2864, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
;
2865 unsigned int size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2865, __FUNCTION__), 0 : 0)), (vinfo)->size)
;
2866 tree vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype;
2867 if (! vect_store_lanes_supported (vectype, size, false)
2868 && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2869 && ! vect_grouped_store_supported (vectype, size))
2870 return opt_result::failure_at (vinfo->stmt,
2871 "unsupported grouped store\n");
2872 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)for (j = 0; ((instance)->loads).iterate ((j), &(node))
; ++(j))
2873 {
2874 vinfo = SLP_TREE_SCALAR_STMTS (node)(node)->stmts[0];
2875 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2875, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
;
2876 bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2876, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
;
2877 size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2877, __FUNCTION__), 0 : 0)), (vinfo)->size)
;
2878 vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype;
2879 if (! vect_load_lanes_supported (vectype, size, false)
2880 && ! vect_grouped_load_supported (vectype, single_element_p,
2881 size))
2882 return opt_result::failure_at (vinfo->stmt,
2883 "unsupported grouped load\n");
2884 }
2885 }
2886
2887 if (dump_enabled_p ())
2888 dump_printf_loc (MSG_NOTE, vect_location,
2889 "re-trying with SLP disabled\n");
2890
2891 /* Roll back state appropriately. No SLP this time. */
2892 slp = false;
2893 /* Restore vectorization factor as it were without SLP. */
2894 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = saved_vectorization_factor;
2895 /* Free the SLP instances. */
2896 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)for (j = 0; ((loop_vinfo)->slp_instances).iterate ((j), &
(instance)); ++(j))
2897 vect_free_slp_instance (instance);
2898 LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.release ();
2899 /* Reset SLP type to loop_vect on all stmts. */
2900 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->num_nodes; ++i)
2901 {
2902 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs[i];
2903 for (gimple_stmt_iterator si = gsi_start_phis (bb);
2904 !gsi_end_p (si); gsi_next (&si))
2905 {
2906 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
2907 STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect;
2908 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def
2909 || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def)
2910 {
2911 /* vectorizable_reduction adjusts reduction stmt def-types,
2912 restore them to that of the PHI. */
2913 STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))((stmt_info)->reduc_def)->def_type
2914 = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type;
2915 STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type
2916 (STMT_VINFO_REDUC_DEF (stmt_info)))(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type
2917 = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type;
2918 }
2919 }
2920 for (gimple_stmt_iterator si = gsi_start_bb (bb);
2921 !gsi_end_p (si); gsi_next (&si))
2922 {
2923 if (is_gimple_debug (gsi_stmt (si)))
2924 continue;
2925 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
2926 STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect;
2927 if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p)
2928 {
2929 stmt_vec_info pattern_stmt_info
2930 = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
2931 if (STMT_VINFO_SLP_VECT_ONLY_PATTERN (pattern_stmt_info)(pattern_stmt_info)->slp_vect_pattern_only_p)
2932 STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p = false;
2933
2934 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq;
2935 STMT_SLP_TYPE (pattern_stmt_info)(pattern_stmt_info)->slp_type = loop_vect;
2936 for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq);
2937 !gsi_end_p (pi); gsi_next (&pi))
2938 STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))(loop_vinfo->lookup_stmt (gsi_stmt (pi)))->slp_type
2939 = loop_vect;
2940 }
2941 }
2942 }
2943 /* Free optimized alias test DDRS. */
2944 LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.truncate (0);
2945 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.release ();
2946 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.release ();
2947 /* Reset target cost data. */
2948 delete loop_vinfo->vector_costs;
2949 loop_vinfo->vector_costs = nullptr;
2950 /* Reset accumulated rgroup information. */
2951 release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks);
2952 release_vec_loop_controls (&LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens);
2953 /* Reset assorted flags. */
2954 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter = false;
2955 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps = false;
2956 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = 0;
2957 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = 0;
2958 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2959 = saved_can_use_partial_vectors_p;
2960
2961 goto start_over;
2962}
2963
2964/* Return true if vectorizing a loop using NEW_LOOP_VINFO appears
2965 to be better than vectorizing it using OLD_LOOP_VINFO. Assume that
2966 OLD_LOOP_VINFO is better unless something specifically indicates
2967 otherwise.
2968
2969 Note that this deliberately isn't a partial order. */
2970
2971static bool
2972vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
2973 loop_vec_info old_loop_vinfo)
2974{
2975 struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo)(new_loop_vinfo)->loop;
2976 gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop)((void)(!((old_loop_vinfo)->loop == loop) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 2976, __FUNCTION__), 0 : 0))
;
2977
2978 poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo)(new_loop_vinfo)->vectorization_factor;
2979 poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo)(old_loop_vinfo)->vectorization_factor;
2980
2981 /* Always prefer a VF of loop->simdlen over any other VF. */
2982 if (loop->simdlen)
2983 {
2984 bool new_simdlen_p = known_eq (new_vf, loop->simdlen)(!maybe_ne (new_vf, loop->simdlen));
2985 bool old_simdlen_p = known_eq (old_vf, loop->simdlen)(!maybe_ne (old_vf, loop->simdlen));
2986 if (new_simdlen_p != old_simdlen_p)
2987 return new_simdlen_p;
2988 }
2989
2990 const auto *old_costs = old_loop_vinfo->vector_costs;
2991 const auto *new_costs = new_loop_vinfo->vector_costs;
2992 if (loop_vec_info main_loop = LOOP_VINFO_ORIG_LOOP_INFO (old_loop_vinfo)(old_loop_vinfo)->orig_loop_info)
2993 return new_costs->better_epilogue_loop_than_p (old_costs, main_loop);
2994
2995 return new_costs->better_main_loop_than_p (old_costs);
2996}
2997
2998/* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return
2999 true if we should. */
3000
3001static bool
3002vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
3003 loop_vec_info old_loop_vinfo)
3004{
3005 if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo))
3006 return false;
3007
3008 if (dump_enabled_p ())
3009 dump_printf_loc (MSG_NOTE, vect_location,
3010 "***** Preferring vector mode %s to vector mode %s\n",
3011 GET_MODE_NAME (new_loop_vinfo->vector_mode)mode_name[new_loop_vinfo->vector_mode],
3012 GET_MODE_NAME (old_loop_vinfo->vector_mode)mode_name[old_loop_vinfo->vector_mode]);
3013 return true;
3014}
3015
3016/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is
3017 not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
3018 MODE_I to the next mode useful to analyze.
3019 Return the loop_vinfo on success and wrapped null on failure. */
3020
3021static opt_loop_vec_info
3022vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
3023 const vect_loop_form_info *loop_form_info,
3024 loop_vec_info main_loop_vinfo,
3025 const vector_modes &vector_modes, unsigned &mode_i,
3026 machine_mode &autodetected_vector_mode,
3027 bool &fatal)
3028{
3029 loop_vec_info loop_vinfo
3030 = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo);
3031
3032 machine_mode vector_mode = vector_modes[mode_i];
3033 loop_vinfo->vector_mode = vector_mode;
3034 unsigned int suggested_unroll_factor = 1;
3035 bool slp_done_for_suggested_uf;
3036
3037 /* Run the main analysis. */
3038 opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
3039 &suggested_unroll_factor,
3040 slp_done_for_suggested_uf);
3041 if (dump_enabled_p ())
3042 dump_printf_loc (MSG_NOTE, vect_location,
3043 "***** Analysis %s with vector mode %s\n",
3044 res ? "succeeded" : " failed",
3045 GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]);
3046
3047 if (!main_loop_vinfo && suggested_unroll_factor > 1)
3048 {
3049 if (dump_enabled_p ())
3050 dump_printf_loc (MSG_NOTE, vect_location,
3051 "***** Re-trying analysis for unrolling"
3052 " with unroll factor %d and slp %s.\n",
3053 suggested_unroll_factor,
3054 slp_done_for_suggested_uf ? "on" : "off");
3055 loop_vec_info unroll_vinfo
3056 = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo);
3057 unroll_vinfo->vector_mode = vector_mode;
3058 unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
3059 opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULLnullptr,
3060 slp_done_for_suggested_uf);
3061 if (new_res)
3062 {
3063 delete loop_vinfo;
3064 loop_vinfo = unroll_vinfo;
3065 }
3066 else
3067 delete unroll_vinfo;
3068 }
3069
3070 /* Remember the autodetected vector mode. */
3071 if (vector_mode == VOIDmode((void) 0, E_VOIDmode))
3072 autodetected_vector_mode = loop_vinfo->vector_mode;
3073
3074 /* Advance mode_i, first skipping modes that would result in the
3075 same analysis result. */
3076 while (mode_i + 1 < vector_modes.length ()
3077 && vect_chooses_same_modes_p (loop_vinfo,
3078 vector_modes[mode_i + 1]))
3079 {
3080 if (dump_enabled_p ())
3081 dump_printf_loc (MSG_NOTE, vect_location,
3082 "***** The result for vector mode %s would"
3083 " be the same\n",
3084 GET_MODE_NAME (vector_modes[mode_i + 1])mode_name[vector_modes[mode_i + 1]]);
3085 mode_i += 1;
3086 }
3087 if (mode_i + 1 < vector_modes.length ()
3088 && VECTOR_MODE_P (autodetected_vector_mode)(((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_BOOL
|| ((enum mode_class) mode_class[autodetected_vector_mode]) ==
MODE_VECTOR_INT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_UACCUM)
3089 && (related_vector_mode (vector_modes[mode_i + 1],
3090 GET_MODE_INNER (autodetected_vector_mode)(mode_to_inner (autodetected_vector_mode)))
3091 == autodetected_vector_mode)
3092 && (related_vector_mode (autodetected_vector_mode,
3093 GET_MODE_INNER (vector_modes[mode_i + 1])(mode_to_inner (vector_modes[mode_i + 1])))
3094 == vector_modes[mode_i + 1]))
3095 {
3096 if (dump_enabled_p ())
3097 dump_printf_loc (MSG_NOTE, vect_location,
3098 "***** Skipping vector mode %s, which would"
3099 " repeat the analysis for %s\n",
3100 GET_MODE_NAME (vector_modes[mode_i + 1])mode_name[vector_modes[mode_i + 1]],
3101 GET_MODE_NAME (autodetected_vector_mode)mode_name[autodetected_vector_mode]);
3102 mode_i += 1;
3103 }
3104 mode_i++;
3105
3106 if (!res)
3107 {
3108 delete loop_vinfo;
3109 if (fatal)
3110 gcc_checking_assert (main_loop_vinfo == NULL)((void)(!(main_loop_vinfo == nullptr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3110, __FUNCTION__), 0 : 0))
;
3111 return opt_loop_vec_info::propagate_failure (res);
3112 }
3113
3114 return opt_loop_vec_info::success (loop_vinfo);
3115}
3116
3117/* Function vect_analyze_loop.
3118
3119 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3120 for it. The different analyses will record information in the
3121 loop_vec_info struct. */
3122opt_loop_vec_info
3123vect_analyze_loop (class loop *loop, vec_info_shared *shared)
3124{
3125 DUMP_VECT_SCOPE ("analyze_loop_nest")auto_dump_scope scope ("analyze_loop_nest", vect_location);
3126
3127 if (loop_outer (loop)
3128 && loop_vec_info_for_loop (loop_outer (loop))
3129 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))(loop_vec_info_for_loop (loop_outer (loop)))->vectorizable)
3130 return opt_loop_vec_info::failure_at (vect_location,
3131 "outer-loop already vectorized.\n");
3132
3133 if (!find_loop_nest (loop, &shared->loop_nest))
3134 return opt_loop_vec_info::failure_at
3135 (vect_location,
3136 "not vectorized: loop nest containing two or more consecutive inner"
3137 " loops cannot be vectorized\n");
3138
3139 /* Analyze the loop form. */
3140 vect_loop_form_info loop_form_info;
3141 opt_result res = vect_analyze_loop_form (loop, &loop_form_info);
3142 if (!res)
3143 {
3144 if (dump_enabled_p ())
3145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3146 "bad loop form.\n");
3147 return opt_loop_vec_info::propagate_failure (res);
3148 }
3149 if (!integer_onep (loop_form_info.assumptions))
3150 {
3151 /* We consider to vectorize this loop by versioning it under
3152 some assumptions. In order to do this, we need to clear
3153 existing information computed by scev and niter analyzer. */
3154 scev_reset_htab ();
3155 free_numbers_of_iterations_estimates (loop);
3156 /* Also set flag for this loop so that following scev and niter
3157 analysis are done under the assumptions. */
3158 loop_constraint_set (loop, LOOP_C_FINITE(1 << 1));
3159 }
3160
3161 auto_vector_modes vector_modes;
3162 /* Autodetect first vector size we try. */
3163 vector_modes.safe_push (VOIDmode((void) 0, E_VOIDmode));
3164 unsigned int autovec_flags
3165 = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
3166 loop->simdlen != 0);
3167 bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
3168 && !unlimited_cost_model (loop));
3169 machine_mode autodetected_vector_mode = VOIDmode((void) 0, E_VOIDmode);
3170 opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3171 unsigned int mode_i = 0;
3172 unsigned HOST_WIDE_INTlong simdlen = loop->simdlen;
3173
3174 /* Keep track of the VF for each mode. Initialize all to 0 which indicates
3175 a mode has not been analyzed. */
3176 auto_vec<poly_uint64, 8> cached_vf_per_mode;
3177 for (unsigned i = 0; i < vector_modes.length (); ++i)
3178 cached_vf_per_mode.safe_push (0);
3179
3180 /* First determine the main loop vectorization mode, either the first
3181 one that works, starting with auto-detecting the vector mode and then
3182 following the targets order of preference, or the one with the
3183 lowest cost if pick_lowest_cost_p. */
3184 while (1)
3185 {
3186 bool fatal;
3187 unsigned int last_mode_i = mode_i;
3188 /* Set cached VF to -1 prior to analysis, which indicates a mode has
3189 failed. */
3190 cached_vf_per_mode[last_mode_i] = -1;
3191 opt_loop_vec_info loop_vinfo
3192 = vect_analyze_loop_1 (loop, shared, &loop_form_info,
3193 NULLnullptr, vector_modes, mode_i,
3194 autodetected_vector_mode, fatal);
3195 if (fatal)
3196 break;
3197
3198 if (loop_vinfo)
3199 {
3200 /* Analyzis has been successful so update the VF value. The
3201 VF should always be a multiple of unroll_factor and we want to
3202 capture the original VF here. */
3203 cached_vf_per_mode[last_mode_i]
3204 = exact_div (LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor,
3205 loop_vinfo->suggested_unroll_factor);
3206 /* Once we hit the desired simdlen for the first time,
3207 discard any previous attempts. */
3208 if (simdlen
3209 && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen)(!maybe_ne ((loop_vinfo)->vectorization_factor, simdlen)))
3210 {
3211 delete first_loop_vinfo;
3212 first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3213 simdlen = 0;
3214 }
3215 else if (pick_lowest_cost_p
3216 && first_loop_vinfo
3217 && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
3218 {
3219 /* Pick loop_vinfo over first_loop_vinfo. */
3220 delete first_loop_vinfo;
3221 first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3222 }
3223 if (first_loop_vinfo == NULLnullptr)
3224 first_loop_vinfo = loop_vinfo;
3225 else
3226 {
3227 delete loop_vinfo;
3228 loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3229 }
3230
3231 /* Commit to first_loop_vinfo if we have no reason to try
3232 alternatives. */
3233 if (!simdlen && !pick_lowest_cost_p)
3234 break;
3235 }
3236 if (mode_i == vector_modes.length ()
3237 || autodetected_vector_mode == VOIDmode((void) 0, E_VOIDmode))
3238 break;
3239
3240 /* Try the next biggest vector size. */
3241 if (dump_enabled_p ())
3242 dump_printf_loc (MSG_NOTE, vect_location,
3243 "***** Re-trying analysis with vector mode %s\n",
3244 GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]);
3245 }
3246 if (!first_loop_vinfo)
3247 return opt_loop_vec_info::propagate_failure (res);
3248
3249 if (dump_enabled_p ())
3250 dump_printf_loc (MSG_NOTE, vect_location,
3251 "***** Choosing vector mode %s\n",
3252 GET_MODE_NAME (first_loop_vinfo->vector_mode)mode_name[first_loop_vinfo->vector_mode]);
3253
3254 /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
3255 enabled, SIMDUID is not set, it is the innermost loop and we have
3256 either already found the loop's SIMDLEN or there was no SIMDLEN to
3257 begin with.
3258 TODO: Enable epilogue vectorization for loops with SIMDUID set. */
3259 bool vect_epilogues = (!simdlen
3260 && loop->inner == NULLnullptr
3261 && param_vect_epilogues_nomaskglobal_options.x_param_vect_epilogues_nomask
3262 && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)(first_loop_vinfo)->peeling_for_niter
3263 && !loop->simduid);
3264 if (!vect_epilogues)
3265 return first_loop_vinfo;
3266
3267 /* Now analyze first_loop_vinfo for epilogue vectorization. */
3268 poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold;
3269
3270 /* For epilogues start the analysis from the first mode. The motivation
3271 behind starting from the beginning comes from cases where the VECTOR_MODES
3272 array may contain length-agnostic and length-specific modes. Their
3273 ordering is not guaranteed, so we could end up picking a mode for the main
3274 loop that is after the epilogue's optimal mode. */
3275 vector_modes[0] = autodetected_vector_mode;
3276 mode_i = 0;
3277
3278 bool supports_partial_vectors =
3279 partial_vectors_supported_p () && param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0;
3280 poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo)(first_loop_vinfo)->vectorization_factor;
3281
3282 while (1)
3283 {
3284 /* If the target does not support partial vectors we can shorten the
3285 number of modes to analyze for the epilogue as we know we can't pick a
3286 mode that would lead to a VF at least as big as the
3287 FIRST_VINFO_VF. */
3288 if (!supports_partial_vectors
3289 && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf)maybe_le (first_vinfo_vf, cached_vf_per_mode[mode_i]))
3290 {
3291 mode_i++;
3292 if (mode_i == vector_modes.length ())
3293 break;
3294 continue;
3295 }
3296
3297 if (dump_enabled_p ())
3298 dump_printf_loc (MSG_NOTE, vect_location,
3299 "***** Re-trying epilogue analysis with vector "
3300 "mode %s\n", GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]);
3301
3302 bool fatal;
3303 opt_loop_vec_info loop_vinfo
3304 = vect_analyze_loop_1 (loop, shared, &loop_form_info,
3305 first_loop_vinfo,
3306 vector_modes, mode_i,
3307 autodetected_vector_mode, fatal);
3308 if (fatal)
3309 break;
3310
3311 if (loop_vinfo)
3312 {
3313 if (pick_lowest_cost_p)
3314 {
3315 /* Keep trying to roll back vectorization attempts while the
3316 loop_vec_infos they produced were worse than this one. */
3317 vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos;
3318 while (!vinfos.is_empty ()
3319 && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ()))
3320 {
3321 gcc_assert (vect_epilogues)((void)(!(vect_epilogues) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3321, __FUNCTION__), 0 : 0))
;
3322 delete vinfos.pop ();
3323 }
3324 }
3325 /* For now only allow one epilogue loop. */
3326 if (first_loop_vinfo->epilogue_vinfos.is_empty ())
3327 {
3328 first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
3329 poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold;
3330 gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () >
0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || (
loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo
)->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th,
0U)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3331, __FUNCTION__), 0 : 0))
3331 || maybe_ne (lowest_th, 0U))((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () >
0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || (
loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo
)->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th,
0U)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3331, __FUNCTION__), 0 : 0))
;
3332 /* Keep track of the known smallest versioning
3333 threshold. */
3334 if (ordered_p (lowest_th, th))
3335 lowest_th = ordered_min (lowest_th, th);
3336 }
3337 else
3338 {
3339 delete loop_vinfo;
3340 loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3341 }
3342
3343 /* For now only allow one epilogue loop, but allow
3344 pick_lowest_cost_p to replace it, so commit to the
3345 first epilogue if we have no reason to try alternatives. */
3346 if (!pick_lowest_cost_p)
3347 break;
3348 }
3349
3350 if (mode_i == vector_modes.length ())
3351 break;
3352
3353 }
3354
3355 if (!first_loop_vinfo->epilogue_vinfos.is_empty ())
3356 {
3357 LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold = lowest_th;
3358 if (dump_enabled_p ())
3359 dump_printf_loc (MSG_NOTE, vect_location,
3360 "***** Choosing epilogue vector mode %s\n",
3361 GET_MODE_NAMEmode_name[first_loop_vinfo->epilogue_vinfos[0]->vector_mode
]
3362 (first_loop_vinfo->epilogue_vinfos[0]->vector_mode)mode_name[first_loop_vinfo->epilogue_vinfos[0]->vector_mode
]
);
3363 }
3364
3365 return first_loop_vinfo;
3366}
3367
3368/* Return true if there is an in-order reduction function for CODE, storing
3369 it in *REDUC_FN if so. */
3370
3371static bool
3372fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn)
3373{
3374 if (code == PLUS_EXPR)
3375 {
3376 *reduc_fn = IFN_FOLD_LEFT_PLUS;
3377 return true;
3378 }
3379 return false;
3380}
3381
3382/* Function reduction_fn_for_scalar_code
3383
3384 Input:
3385 CODE - tree_code of a reduction operations.
3386
3387 Output:
3388 REDUC_FN - the corresponding internal function to be used to reduce the
3389 vector of partial results into a single scalar result, or IFN_LAST
3390 if the operation is a supported reduction operation, but does not have
3391 such an internal function.
3392
3393 Return FALSE if CODE currently cannot be vectorized as reduction. */
3394
3395bool
3396reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn)
3397{
3398 if (code.is_tree_code ())
3399 switch (tree_code (code))
3400 {
3401 case MAX_EXPR:
3402 *reduc_fn = IFN_REDUC_MAX;
3403 return true;
3404
3405 case MIN_EXPR:
3406 *reduc_fn = IFN_REDUC_MIN;
3407 return true;
3408
3409 case PLUS_EXPR:
3410 *reduc_fn = IFN_REDUC_PLUS;
3411 return true;
3412
3413 case BIT_AND_EXPR:
3414 *reduc_fn = IFN_REDUC_AND;
3415 return true;
3416
3417 case BIT_IOR_EXPR:
3418 *reduc_fn = IFN_REDUC_IOR;
3419 return true;
3420
3421 case BIT_XOR_EXPR:
3422 *reduc_fn = IFN_REDUC_XOR;
3423 return true;
3424
3425 case MULT_EXPR:
3426 case MINUS_EXPR:
3427 *reduc_fn = IFN_LAST;
3428 return true;
3429
3430 default:
3431 return false;
3432 }
3433 else
3434 switch (combined_fn (code))
3435 {
3436 CASE_CFN_FMAXcase CFN_FMAX: case CFN_BUILT_IN_FMAXF: case CFN_BUILT_IN_FMAX
: case CFN_BUILT_IN_FMAXL
:
3437 *reduc_fn = IFN_REDUC_FMAX;
3438 return true;
3439
3440 CASE_CFN_FMINcase CFN_FMIN: case CFN_BUILT_IN_FMINF: case CFN_BUILT_IN_FMIN
: case CFN_BUILT_IN_FMINL
:
3441 *reduc_fn = IFN_REDUC_FMIN;
3442 return true;
3443
3444 default:
3445 return false;
3446 }
3447}
3448
3449/* If there is a neutral value X such that a reduction would not be affected
3450 by the introduction of additional X elements, return that X, otherwise
3451 return null. CODE is the code of the reduction and SCALAR_TYPE is type
3452 of the scalar elements. If the reduction has just a single initial value
3453 then INITIAL_VALUE is that value, otherwise it is null. */
3454
3455tree
3456neutral_op_for_reduction (tree scalar_type, code_helper code,
3457 tree initial_value)
3458{
3459 if (code.is_tree_code ())
3460 switch (tree_code (code))
3461 {
3462 case WIDEN_SUM_EXPR:
3463 case DOT_PROD_EXPR:
3464 case SAD_EXPR:
3465 case PLUS_EXPR:
3466 case MINUS_EXPR:
3467 case BIT_IOR_EXPR:
3468 case BIT_XOR_EXPR:
3469 return build_zero_cst (scalar_type);
3470
3471 case MULT_EXPR:
3472 return build_one_cst (scalar_type);
3473
3474 case BIT_AND_EXPR:
3475 return build_all_ones_cst (scalar_type);
3476
3477 case MAX_EXPR:
3478 case MIN_EXPR:
3479 return initial_value;
3480
3481 default:
3482 return NULL_TREE(tree) nullptr;
3483 }
3484 else
3485 switch (combined_fn (code))
3486 {
3487 CASE_CFN_FMINcase CFN_FMIN: case CFN_BUILT_IN_FMINF: case CFN_BUILT_IN_FMIN
: case CFN_BUILT_IN_FMINL
:
3488 CASE_CFN_FMAXcase CFN_FMAX: case CFN_BUILT_IN_FMAXF: case CFN_BUILT_IN_FMAX
: case CFN_BUILT_IN_FMAXL
:
3489 return initial_value;
3490
3491 default:
3492 return NULL_TREE(tree) nullptr;
3493 }
3494}
3495
3496/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
3497 STMT is printed with a message MSG. */
3498
3499static void
3500report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
3501{
3502 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
3503}
3504
3505/* Return true if we need an in-order reduction for operation CODE
3506 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
3507 overflow must wrap. */
3508
3509bool
3510needs_fold_left_reduction_p (tree type, code_helper code)
3511{
3512 /* CHECKME: check for !flag_finite_math_only too? */
3513 if (SCALAR_FLOAT_TYPE_P (type)(((enum tree_code) (type)->base.code) == REAL_TYPE))
3514 {
3515 if (code.is_tree_code ())
3516 switch (tree_code (code))
3517 {
3518 case MIN_EXPR:
3519 case MAX_EXPR:
3520 return false;
3521
3522 default:
3523 return !flag_associative_mathglobal_options.x_flag_associative_math;
3524 }
3525 else
3526 switch (combined_fn (code))
3527 {
3528 CASE_CFN_FMINcase CFN_FMIN: case CFN_BUILT_IN_FMINF: case CFN_BUILT_IN_FMIN
: case CFN_BUILT_IN_FMINL
:
3529 CASE_CFN_FMAXcase CFN_FMAX: case CFN_BUILT_IN_FMAXF: case CFN_BUILT_IN_FMAX
: case CFN_BUILT_IN_FMAXL
:
3530 return false;
3531
3532 default:
3533 return !flag_associative_mathglobal_options.x_flag_associative_math;
3534 }
3535 }
3536
3537 if (INTEGRAL_TYPE_P (type)(((enum tree_code) (type)->base.code) == ENUMERAL_TYPE || (
(enum tree_code) (type)->base.code) == BOOLEAN_TYPE || ((enum
tree_code) (type)->base.code) == INTEGER_TYPE)
)
3538 return (!code.is_tree_code ()
3539 || !operation_no_trapping_overflow (type, tree_code (code)));
3540
3541 if (SAT_FIXED_POINT_TYPE_P (type)(((enum tree_code) (type)->base.code) == FIXED_POINT_TYPE &&
((tree_not_check4 ((type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3541, __FUNCTION__, (RECORD_TYPE), (UNION_TYPE), (QUAL_UNION_TYPE
), (ARRAY_TYPE)))->base.u.bits.saturating_flag))
)
3542 return true;
3543
3544 return false;
3545}
3546
3547/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
3548 has a handled computation expression. Store the main reduction
3549 operation in *CODE. */
3550
3551static bool
3552check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
3553 tree loop_arg, code_helper *code,
3554 vec<std::pair<ssa_op_iter, use_operand_p> > &path)
3555{
3556 auto_bitmap visited;
3557 tree lookfor = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
3558 ssa_op_iter curri;
3559 use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE0x01);
3560 while (USE_FROM_PTR (curr)get_use_from_ptr (curr) != loop_arg)
3561 curr = op_iter_next_use (&curri);
3562 curri.i = curri.numops;
3563 do
3564 {
3565 path.safe_push (std::make_pair (curri, curr));
3566 tree use = USE_FROM_PTR (curr)get_use_from_ptr (curr);
3567 if (use == lookfor)
3568 break;
3569 gimple *def = SSA_NAME_DEF_STMT (use)(tree_check ((use), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3569, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
;
3570 if (gimple_nop_p (def)
3571 || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
3572 {
3573pop:
3574 do
3575 {
3576 std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
3577 curri = x.first;
3578 curr = x.second;
3579 do
3580 curr = op_iter_next_use (&curri);
3581 /* Skip already visited or non-SSA operands (from iterating
3582 over PHI args). */
3583 while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr)
3584 && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME
3585 || ! bitmap_set_bit (visited,
3586 SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3587, __FUNCTION__, (SSA_NAME)))->base.u.version
3587 (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3587, __FUNCTION__, (SSA_NAME)))->base.u.version
)));
3588 }
3589 while (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr) && ! path.is_empty ());
3590 if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr))
3591 break;
3592 }
3593 else
3594 {
3595 if (gimple_code (def) == GIMPLE_PHI)
3596 curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE0x01);
3597 else
3598 curr = op_iter_init_use (&curri, def, SSA_OP_USE0x01);
3599 while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr)
3600 && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME
3601 || ! bitmap_set_bit (visited,
3602 SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3603, __FUNCTION__, (SSA_NAME)))->base.u.version
3603 (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3603, __FUNCTION__, (SSA_NAME)))->base.u.version
)))
3604 curr = op_iter_next_use (&curri);
3605 if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr))
3606 goto pop;
3607 }
3608 }
3609 while (1);
3610 if (dump_file && (dump_flags & TDF_DETAILS))
3611 {
3612 dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
3613 unsigned i;
3614 std::pair<ssa_op_iter, use_operand_p> *x;
3615 FOR_EACH_VEC_ELT (path, i, x)for (i = 0; (path).iterate ((i), &(x)); ++(i))
3616 dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second)get_use_from_ptr (x->second));
3617 dump_printf (MSG_NOTE, "\n");
3618 }
3619
3620 /* Check whether the reduction path detected is valid. */
3621 bool fail = path.length () == 0;
3622 bool neg = false;
3623 int sign = -1;
3624 *code = ERROR_MARK;
3625 for (unsigned i = 1; i < path.length (); ++i)
3626 {
3627 gimple *use_stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt;
3628 gimple_match_op op;
3629 if (!gimple_extract_op (use_stmt, &op))
3630 {
3631 fail = true;
3632 break;
3633 }
3634 unsigned int opi = op.num_ops;
3635 if (gassign *assign = dyn_cast<gassign *> (use_stmt))
3636 {
3637 /* The following make sure we can compute the operand index
3638 easily plus it mostly disallows chaining via COND_EXPR condition
3639 operands. */
3640 for (opi = 0; opi < op.num_ops; ++opi)
3641 if (gimple_assign_rhs1_ptr (assign) + opi == path[i].second->use)
3642 break;
3643 }
3644 else if (gcall *call = dyn_cast<gcall *> (use_stmt))
3645 {
3646 for (opi = 0; opi < op.num_ops; ++opi)
3647 if (gimple_call_arg_ptr (call, opi) == path[i].second->use)
3648 break;
3649 }
3650 if (opi == op.num_ops)
3651 {
3652 fail = true;
3653 break;
3654 }
3655 op.code = canonicalize_code (op.code, op.type);
3656 if (op.code == MINUS_EXPR)
3657 {
3658 op.code = PLUS_EXPR;
3659 /* Track whether we negate the reduction value each iteration. */
3660 if (op.ops[1] == op.ops[opi])
3661 neg = ! neg;
3662 }
3663 if (CONVERT_EXPR_CODE_P (op.code)((op.code) == NOP_EXPR || (op.code) == CONVERT_EXPR)
3664 && tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0])((contains_struct_check ((op.ops[0]), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3664, __FUNCTION__))->typed.type)
))
3665 ;
3666 else if (*code == ERROR_MARK)
3667 {
3668 *code = op.code;
3669 sign = TYPE_SIGN (op.type)((signop) ((tree_class_check ((op.type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3669, __FUNCTION__))->base.u.bits.unsigned_flag))
;
3670 }
3671 else if (op.code != *code)
3672 {
3673 fail = true;
3674 break;
3675 }
3676 else if ((op.code == MIN_EXPR
3677 || op.code == MAX_EXPR)
3678 && sign != TYPE_SIGN (op.type)((signop) ((tree_class_check ((op.type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3678, __FUNCTION__))->base.u.bits.unsigned_flag))
)
3679 {
3680 fail = true;
3681 break;
3682 }
3683 /* Check there's only a single stmt the op is used on. For the
3684 not value-changing tail and the last stmt allow out-of-loop uses.
3685 ??? We could relax this and handle arbitrary live stmts by
3686 forcing a scalar epilogue for example. */
3687 imm_use_iterator imm_iter;
3688 gimple *op_use_stmt;
3689 unsigned cnt = 0;
3690 FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi])for (struct auto_end_imm_use_stmt_traverse auto_end_imm_use_stmt_traverse
((((op_use_stmt) = first_imm_use_stmt (&(imm_iter), (op.
ops[opi]))), &(imm_iter))); !end_imm_use_stmt_p (&(imm_iter
)); (void) ((op_use_stmt) = next_imm_use_stmt (&(imm_iter
))))
3691 if (!is_gimple_debug (op_use_stmt)
3692 && (*code != ERROR_MARK
3693 || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))))
3694 {
3695 /* We want to allow x + x but not x < 1 ? x : 2. */
3696 if (is_gimple_assign (op_use_stmt)
3697 && gimple_assign_rhs_code (op_use_stmt) == COND_EXPR)
3698 {
3699 use_operand_p use_p;
3700 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)for ((use_p) = first_imm_use_on_stmt (&(imm_iter)); !end_imm_use_on_stmt_p
(&(imm_iter)); (void) ((use_p) = next_imm_use_on_stmt (&
(imm_iter))))
3701 cnt++;
3702 }
3703 else
3704 cnt++;
3705 }
3706 if (cnt != 1)
3707 {
3708 fail = true;
3709 break;
3710 }
3711 }
3712 return ! fail && ! neg && *code != ERROR_MARK;
3713}
3714
3715bool
3716check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
3717 tree loop_arg, enum tree_code code)
3718{
3719 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
3720 code_helper code_;
3721 return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path)
3722 && code_ == code);
3723}
3724
3725
3726
3727/* Function vect_is_simple_reduction
3728
3729 (1) Detect a cross-iteration def-use cycle that represents a simple
3730 reduction computation. We look for the following pattern:
3731
3732 loop_header:
3733 a1 = phi < a0, a2 >
3734 a3 = ...
3735 a2 = operation (a3, a1)
3736
3737 or
3738
3739 a3 = ...
3740 loop_header:
3741 a1 = phi < a0, a2 >
3742 a2 = operation (a3, a1)
3743
3744 such that:
3745 1. operation is commutative and associative and it is safe to
3746 change the order of the computation
3747 2. no uses for a2 in the loop (a2 is used out of the loop)
3748 3. no uses of a1 in the loop besides the reduction operation
3749 4. no uses of a1 outside the loop.
3750
3751 Conditions 1,4 are tested here.
3752 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
3753
3754 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
3755 nested cycles.
3756
3757 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
3758 reductions:
3759
3760 a1 = phi < a0, a2 >
3761 inner loop (def of a3)
3762 a2 = phi < a3 >
3763
3764 (4) Detect condition expressions, ie:
3765 for (int i = 0; i < N; i++)
3766 if (a[i] < val)
3767 ret_val = a[i];
3768
3769*/
3770
3771static stmt_vec_info
3772vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
3773 bool *double_reduc, bool *reduc_chain_p, bool slp)
3774{
3775 gphi *phi = as_a <gphi *> (phi_info->stmt);
3776 gimple *phi_use_stmt = NULLnullptr;
3777 imm_use_iterator imm_iter;
3778 use_operand_p use_p;
3779
3780 *double_reduc = false;
3781 *reduc_chain_p = false;
3782 STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = TREE_CODE_REDUCTION;
3783
3784 tree phi_name = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
3785 /* ??? If there are no uses of the PHI result the inner loop reduction
3786 won't be detected as possibly double-reduction by vectorizable_reduction
3787 because that tries to walk the PHI arg from the preheader edge which
3788 can be constant. See PR60382. */
3789 if (has_zero_uses (phi_name))
3790 return NULLnullptr;
3791 class loop *loop = (gimple_bb (phi))->loop_father;
3792 unsigned nphi_def_loop_uses = 0;
3793 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)for ((use_p) = first_readonly_imm_use (&(imm_iter), (phi_name
)); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p
) = next_readonly_imm_use (&(imm_iter))))
3794 {
3795 gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt;
3796 if (is_gimple_debug (use_stmt))
3797 continue;
3798
3799 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
3800 {
3801 if (dump_enabled_p ())
3802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3803 "intermediate value used outside loop.\n");
3804
3805 return NULLnullptr;
3806 }
3807
3808 nphi_def_loop_uses++;
3809 phi_use_stmt = use_stmt;
3810 }
3811
3812 tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop))gimple_phi_arg_def (((phi)), ((loop_latch_edge (loop))->dest_idx
))
;
3813 if (TREE_CODE (latch_def)((enum tree_code) (latch_def)->base.code) != SSA_NAME)
3814 {
3815 if (dump_enabled_p ())
3816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3817 "reduction: not ssa_name: %T\n", latch_def);
3818 return NULLnullptr;
3819 }
3820
3821 stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def);
3822 if (!def_stmt_info
3823 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
3824 return NULLnullptr;
3825
3826 bool nested_in_vect_loop
3827 = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info)(loop_info)->loop, loop);
3828 unsigned nlatch_def_loop_uses = 0;
3829 auto_vec<gphi *, 3> lcphis;
3830 bool inner_loop_of_double_reduc = false;
3831 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (latch_def
)); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p
) = next_readonly_imm_use (&(imm_iter))))
3832 {
3833 gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt;
3834 if (is_gimple_debug (use_stmt))
3835 continue;
3836 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
3837 nlatch_def_loop_uses++;
3838 else
3839 {
3840 /* We can have more than one loop-closed PHI. */
3841 lcphis.safe_push (as_a <gphi *> (use_stmt));
3842 if (nested_in_vect_loop
3843 && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))(loop_info->lookup_stmt (use_stmt))->def_type
3844 == vect_double_reduction_def))
3845 inner_loop_of_double_reduc = true;
3846 }
3847 }
3848
3849 /* If we are vectorizing an inner reduction we are executing that
3850 in the original order only in case we are not dealing with a
3851 double reduction. */
3852 if (nested_in_vect_loop && !inner_loop_of_double_reduc)
3853 {
3854 if (dump_enabled_p ())
3855 report_vect_op (MSG_NOTE, def_stmt_info->stmt,
3856 "detected nested cycle: ");
3857 return def_stmt_info;
3858 }
3859
3860 /* When the inner loop of a double reduction ends up with more than
3861 one loop-closed PHI we have failed to classify alternate such
3862 PHIs as double reduction, leading to wrong code. See PR103237. */
3863 if (inner_loop_of_double_reduc && lcphis.length () != 1)
3864 {
3865 if (dump_enabled_p ())
3866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3867 "unhandle double reduction\n");
3868 return NULLnullptr;
3869 }
3870
3871 /* If this isn't a nested cycle or if the nested cycle reduction value
3872 is used ouside of the inner loop we cannot handle uses of the reduction
3873 value. */
3874 if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)
3875 {
3876 if (dump_enabled_p ())
3877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3878 "reduction used in loop.\n");
3879 return NULLnullptr;
3880 }
3881
3882 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
3883 defined in the inner loop. */
3884 if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
3885 {
3886 tree op1 = PHI_ARG_DEF (def_stmt, 0)gimple_phi_arg_def ((def_stmt), (0));
3887 if (gimple_phi_num_args (def_stmt) != 1
3888 || TREE_CODE (op1)((enum tree_code) (op1)->base.code) != SSA_NAME)
3889 {
3890 if (dump_enabled_p ())
3891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3892 "unsupported phi node definition.\n");
3893
3894 return NULLnullptr;
3895 }
3896
3897 /* Verify there is an inner cycle composed of the PHI phi_use_stmt
3898 and the latch definition op1. */
3899 gimple *def1 = SSA_NAME_DEF_STMT (op1)(tree_check ((op1), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3899, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
;
3900 if (gimple_bb (def1)
3901 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3902 && loop->inner
3903 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
3904 && (is_gimple_assign (def1) || is_gimple_call (def1))
3905 && is_a <gphi *> (phi_use_stmt)
3906 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt))
3907 && (op1 == PHI_ARG_DEF_FROM_EDGE (phi_use_stmt,gimple_phi_arg_def (((phi_use_stmt)), ((loop_latch_edge (loop
->inner))->dest_idx))
3908 loop_latch_edge (loop->inner))gimple_phi_arg_def (((phi_use_stmt)), ((loop_latch_edge (loop
->inner))->dest_idx))
))
3909 {
3910 if (dump_enabled_p ())
3911 report_vect_op (MSG_NOTE, def_stmt,
3912 "detected double reduction: ");
3913
3914 *double_reduc = true;
3915 return def_stmt_info;
3916 }
3917
3918 return NULLnullptr;
3919 }
3920
3921 /* Look for the expression computing latch_def from then loop PHI result. */
3922 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
3923 code_helper code;
3924 if (check_reduction_path (vect_location, loop, phi, latch_def, &code,
3925 path))
3926 {
3927 STMT_VINFO_REDUC_CODE (phi_info)(phi_info)->reduc_code = code;
3928 if (code == COND_EXPR && !nested_in_vect_loop)
3929 STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = COND_REDUCTION;
3930
3931 /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP
3932 reduction chain for which the additional restriction is that
3933 all operations in the chain are the same. */
3934 auto_vec<stmt_vec_info, 8> reduc_chain;
3935 unsigned i;
3936 bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
3937 for (i = path.length () - 1; i >= 1; --i)
3938 {
3939 gimple *stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt;
3940 stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
3941 gimple_match_op op;
3942 if (!gimple_extract_op (stmt, &op))
3943 gcc_unreachable ()(fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3943, __FUNCTION__))
;
3944 if (gassign *assign = dyn_cast<gassign *> (stmt))
3945 STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx
3946 = path[i].second->use - gimple_assign_rhs1_ptr (assign);
3947 else
3948 {
3949 gcall *call = as_a<gcall *> (stmt);
3950 STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx
3951 = path[i].second->use - gimple_call_arg_ptr (call, 0);
3952 }
3953 bool leading_conversion = (CONVERT_EXPR_CODE_P (op.code)((op.code) == NOP_EXPR || (op.code) == CONVERT_EXPR)
3954 && (i == 1 || i == path.length () - 1));
3955 if ((op.code != code && !leading_conversion)
3956 /* We can only handle the final value in epilogue
3957 generation for reduction chains. */
3958 || (i != 1 && !has_single_use (gimple_get_lhs (stmt))))
3959 is_slp_reduc = false;
3960 /* For reduction chains we support a trailing/leading
3961 conversions. We do not store those in the actual chain. */
3962 if (leading_conversion)
3963 continue;
3964 reduc_chain.safe_push (stmt_info);
3965 }
3966 if (slp && is_slp_reduc && reduc_chain.length () > 1)
3967 {
3968 for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
3969 {
3970 REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3970, __FUNCTION__), 0 : 0)), (reduc_chain[i])->first_element
)
= reduc_chain[0];
3971 REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3971, __FUNCTION__), 0 : 0)), (reduc_chain[i])->next_element
)
= reduc_chain[i+1];
3972 }
3973 REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3973, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->first_element
)
= reduc_chain[0];
3974 REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3974, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->next_element
)
= NULLnullptr;
3975
3976 /* Save the chain for further analysis in SLP detection. */
3977 LOOP_VINFO_REDUCTION_CHAINS (loop_info)(loop_info)->reduction_chains.safe_push (reduc_chain[0]);
3978 REDUC_GROUP_SIZE (reduc_chain[0])(((void)(!(!(reduc_chain[0])->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 3978, __FUNCTION__), 0 : 0)), (reduc_chain[0])->size)
= reduc_chain.length ();
3979
3980 *reduc_chain_p = true;
3981 if (dump_enabled_p ())
3982 dump_printf_loc (MSG_NOTE, vect_location,
3983 "reduction: detected reduction chain\n");
3984 }
3985 else if (dump_enabled_p ())
3986 dump_printf_loc (MSG_NOTE, vect_location,
3987 "reduction: detected reduction\n");
3988
3989 return def_stmt_info;
3990 }
3991
3992 if (dump_enabled_p ())
3993 dump_printf_loc (MSG_NOTE, vect_location,
3994 "reduction: unknown pattern\n");
3995
3996 return NULLnullptr;
3997}
3998
3999/* Estimate the number of peeled epilogue iterations for LOOP_VINFO.
4000 PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations,
4001 or -1 if not known. */
4002
4003static int
4004vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue)
4005{
4006 int assumed_vf = vect_vf_for_cost (loop_vinfo);
4007 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
|| peel_iters_prologue == -1)
4008 {
4009 if (dump_enabled_p ())
4010 dump_printf_loc (MSG_NOTE, vect_location,
4011 "cost model: epilogue peel iters set to vf/2 "
4012 "because loop iterations are unknown .\n");
4013 return assumed_vf / 2;
4014 }
4015 else
4016 {
4017 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4017, __FUNCTION__))))
;
4018 peel_iters_prologue = MIN (niters, peel_iters_prologue)((niters) < (peel_iters_prologue) ? (niters) : (peel_iters_prologue
))
;
4019 int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
4020 /* If we need to peel for gaps, but no peeling is required, we have to
4021 peel VF iterations. */
4022 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps && !peel_iters_epilogue)
4023 peel_iters_epilogue = assumed_vf;
4024 return peel_iters_epilogue;
4025 }
4026}
4027
4028/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
4029int
4030vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
4031 int *peel_iters_epilogue,
4032 stmt_vector_for_cost *scalar_cost_vec,
4033 stmt_vector_for_cost *prologue_cost_vec,
4034 stmt_vector_for_cost *epilogue_cost_vec)
4035{
4036 int retval = 0;
4037
4038 *peel_iters_epilogue
4039 = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue);
4040
4041 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
)
4042 {
4043 /* If peeled iterations are known but number of scalar loop
4044 iterations are unknown, count a taken branch per peeled loop. */
4045 if (peel_iters_prologue > 0)
4046 retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
4047 vect_prologue);
4048 if (*peel_iters_epilogue > 0)
4049 retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
4050 vect_epilogue);
4051 }
4052
4053 stmt_info_for_cost *si;
4054 int j;
4055 if (peel_iters_prologue)
4056 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j
))
4057 retval += record_stmt_cost (prologue_cost_vec,
4058 si->count * peel_iters_prologue,
4059 si->kind, si->stmt_info, si->misalign,
4060 vect_prologue);
4061 if (*peel_iters_epilogue)
4062 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j
))
4063 retval += record_stmt_cost (epilogue_cost_vec,
4064 si->count * *peel_iters_epilogue,
4065 si->kind, si->stmt_info, si->misalign,
4066 vect_epilogue);
4067
4068 return retval;
4069}
4070
4071/* Function vect_estimate_min_profitable_iters
4072
4073 Return the number of iterations required for the vector version of the
4074 loop to be profitable relative to the cost of the scalar version of the
4075 loop.
4076
4077 *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
4078 of iterations for vectorization. -1 value means loop vectorization
4079 is not profitable. This returned value may be used for dynamic
4080 profitability check.
4081
4082 *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
4083 for static check against estimated number of iterations. */
4084
4085static void
4086vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
4087 int *ret_min_profitable_niters,
4088 int *ret_min_profitable_estimate,
4089 unsigned *suggested_unroll_factor)
4090{
4091 int min_profitable_iters;
4092 int min_profitable_estimate;
4093 int peel_iters_prologue;
4094 int peel_iters_epilogue;
4095 unsigned vec_inside_cost = 0;
4096 int vec_outside_cost = 0;
4097 unsigned vec_prologue_cost = 0;
4098 unsigned vec_epilogue_cost = 0;
4099 int scalar_single_iter_cost = 0;
4100 int scalar_outside_cost = 0;
4101 int assumed_vf = vect_vf_for_cost (loop_vinfo);
4102 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment;
4103 vector_costs *target_cost_data = loop_vinfo->vector_costs;
4104
4105 /* Cost model disabled. */
4106 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop))
4107 {
4108 if (dump_enabled_p ())
4109 dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
4110 *ret_min_profitable_niters = 0;
4111 *ret_min_profitable_estimate = 0;
4112 return;
4113 }
4114
4115 /* Requires loop versioning tests to handle misalignment. */
4116 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)((loop_vinfo)->may_misalign_stmts.length () > 0))
4117 {
4118 /* FIXME: Make cost depend on complexity of individual check. */
4119 unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.length ();
4120 (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
4121 if (dump_enabled_p ())
4122 dump_printf (MSG_NOTE,
4123 "cost model: Adding cost of checks for loop "
4124 "versioning to treat misalignment.\n");
4125 }
4126
4127 /* Requires loop versioning with alias checks. */
4128 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)((loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0)
)
4129 {
4130 /* FIXME: Make cost depend on complexity of individual check. */
4131 unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.length ();
4132 (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
4133 len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.length ();
4134 if (len)
4135 /* Count LEN - 1 ANDs and LEN comparisons. */
4136 (void) add_stmt_cost (target_cost_data, len * 2 - 1,
4137 scalar_stmt, vect_prologue);
4138 len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.length ();
4139 if (len)
4140 {
4141 /* Count LEN - 1 ANDs and LEN comparisons. */
4142 unsigned int nstmts = len * 2 - 1;
4143 /* +1 for each bias that needs adding. */
4144 for (unsigned int i = 0; i < len; ++i)
4145 if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds[i].unsigned_p)
4146 nstmts += 1;
4147 (void) add_stmt_cost (target_cost_data, nstmts,
4148 scalar_stmt, vect_prologue);
4149 }
4150 if (dump_enabled_p ())
4151 dump_printf (MSG_NOTE,
4152 "cost model: Adding cost of checks for loop "
4153 "versioning aliasing.\n");
4154 }
4155
4156 /* Requires loop versioning with niter checks. */
4157 if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo)((loop_vinfo)->num_iters_assumptions))
4158 {
4159 /* FIXME: Make cost depend on complexity of individual check. */
4160 (void) add_stmt_cost (target_cost_data, 1, vector_stmt,
4161 NULLnullptr, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
4162 if (dump_enabled_p ())
4163 dump_printf (MSG_NOTE,
4164 "cost model: Adding cost of checks for loop "
4165 "versioning niters.\n");
4166 }
4167
4168 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
)
4169 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
4170 vect_prologue);
4171
4172 /* Count statements in scalar loop. Using this as scalar cost for a single
4173 iteration for now.
4174
4175 TODO: Add outer loop support.
4176
4177 TODO: Consider assigning different costs to different scalar
4178 statements. */
4179
4180 scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
4181
4182 /* Add additional cost for the peeled instructions in prologue and epilogue
4183 loop. (For fully-masked loops there will be no peeling.)
4184
4185 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
4186 at compile-time - we assume it's vf/2 (the worst would be vf-1).
4187
4188 TODO: Build an expression that represents peel_iters for prologue and
4189 epilogue to be used in a run-time test. */
4190
4191 bool prologue_need_br_taken_cost = false;
4192 bool prologue_need_br_not_taken_cost = false;
4193
4194 /* Calculate peel_iters_prologue. */
4195 if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
4196 peel_iters_prologue = 0;
4197 else if (npeel < 0)
4198 {
4199 peel_iters_prologue = assumed_vf / 2;
4200 if (dump_enabled_p ())
4201 dump_printf (MSG_NOTE, "cost model: "
4202 "prologue peel iters set to vf/2.\n");
4203
4204 /* If peeled iterations are unknown, count a taken branch and a not taken
4205 branch per peeled loop. Even if scalar loop iterations are known,
4206 vector iterations are not known since peeled prologue iterations are
4207 not known. Hence guards remain the same. */
4208 prologue_need_br_taken_cost = true;
4209 prologue_need_br_not_taken_cost = true;
4210 }
4211 else
4212 {
4213 peel_iters_prologue = npeel;
4214 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
&& peel_iters_prologue > 0)
4215 /* If peeled iterations are known but number of scalar loop
4216 iterations are unknown, count a taken branch per peeled loop. */
4217 prologue_need_br_taken_cost = true;
4218 }
4219
4220 bool epilogue_need_br_taken_cost = false;
4221 bool epilogue_need_br_not_taken_cost = false;
4222
4223 /* Calculate peel_iters_epilogue. */
4224 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4225 /* We need to peel exactly one iteration for gaps. */
4226 peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps ? 1 : 0;
4227 else if (npeel < 0)
4228 {
4229 /* If peeling for alignment is unknown, loop bound of main loop
4230 becomes unknown. */
4231 peel_iters_epilogue = assumed_vf / 2;
4232 if (dump_enabled_p ())
4233 dump_printf (MSG_NOTE, "cost model: "
4234 "epilogue peel iters set to vf/2 because "
4235 "peeling for alignment is unknown.\n");
4236
4237 /* See the same reason above in peel_iters_prologue calculation. */
4238 epilogue_need_br_taken_cost = true;
4239 epilogue_need_br_not_taken_cost = true;
4240 }
4241 else
4242 {
4243 peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, npeel);
4244 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
&& peel_iters_epilogue > 0)
4245 /* If peeled iterations are known but number of scalar loop
4246 iterations are unknown, count a taken branch per peeled loop. */
4247 epilogue_need_br_taken_cost = true;
4248 }
4249
4250 stmt_info_for_cost *si;
4251 int j;
4252 /* Add costs associated with peel_iters_prologue. */
4253 if (peel_iters_prologue)
4254 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
4255 {
4256 (void) add_stmt_cost (target_cost_data,
4257 si->count * peel_iters_prologue, si->kind,
4258 si->stmt_info, si->node, si->vectype,
4259 si->misalign, vect_prologue);
4260 }
4261
4262 /* Add costs associated with peel_iters_epilogue. */
4263 if (peel_iters_epilogue)
4264 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
4265 {
4266 (void) add_stmt_cost (target_cost_data,
4267 si->count * peel_iters_epilogue, si->kind,
4268 si->stmt_info, si->node, si->vectype,
4269 si->misalign, vect_epilogue);
4270 }
4271
4272 /* Add possible cond_branch_taken/cond_branch_not_taken cost. */
4273
4274 if (prologue_need_br_taken_cost)
4275 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
4276 vect_prologue);
4277
4278 if (prologue_need_br_not_taken_cost)
4279 (void) add_stmt_cost (target_cost_data, 1,
4280 cond_branch_not_taken, vect_prologue);
4281
4282 if (epilogue_need_br_taken_cost)
4283 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
4284 vect_epilogue);
4285
4286 if (epilogue_need_br_not_taken_cost)
4287 (void) add_stmt_cost (target_cost_data, 1,
4288 cond_branch_not_taken, vect_epilogue);
4289
4290 /* Take care of special costs for rgroup controls of partial vectors. */
4291 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo
)->masks.is_empty ())
)
4292 {
4293 /* Calculate how many masks we need to generate. */
4294 unsigned int num_masks = 0;
4295 rgroup_controls *rgm;
4296 unsigned int num_vectors_m1;
4297 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)for (num_vectors_m1 = 0; ((loop_vinfo)->masks).iterate ((num_vectors_m1
), &(rgm)); ++(num_vectors_m1))
4298 if (rgm->type)
4299 num_masks += num_vectors_m1 + 1;
4300 gcc_assert (num_masks > 0)((void)(!(num_masks > 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4300, __FUNCTION__), 0 : 0))
;
4301
4302 /* In the worst case, we need to generate each mask in the prologue
4303 and in the loop body. One of the loop body mask instructions
4304 replaces the comparison in the scalar loop, and since we don't
4305 count the scalar comparison against the scalar body, we shouldn't
4306 count that vector instruction against the vector body either.
4307
4308 Sometimes we can use unpacks instead of generating prologue
4309 masks and sometimes the prologue mask will fold to a constant,
4310 so the actual prologue cost might be smaller. However, it's
4311 simpler and safer to use the worst-case cost; if this ends up
4312 being the tie-breaker between vectorizing or not, then it's
4313 probably better not to vectorize. */
4314 (void) add_stmt_cost (target_cost_data, num_masks,
4315 vector_stmt, NULLnullptr, NULLnullptr, NULL_TREE(tree) nullptr, 0,
4316 vect_prologue);
4317 (void) add_stmt_cost (target_cost_data, num_masks - 1,
4318 vector_stmt, NULLnullptr, NULLnullptr, NULL_TREE(tree) nullptr, 0,
4319 vect_body);
4320 }
4321 else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo
)->lens.is_empty ())
)
4322 {
4323 /* Referring to the functions vect_set_loop_condition_partial_vectors
4324 and vect_set_loop_controls_directly, we need to generate each
4325 length in the prologue and in the loop body if required. Although
4326 there are some possible optimizations, we consider the worst case
4327 here. */
4328
4329 bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
;
4330 signed char partial_load_store_bias
4331 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)(loop_vinfo)->partial_load_store_bias;
4332 bool need_iterate_p
4333 = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)
4334 && !vect_known_niters_smaller_than_vf (loop_vinfo));
4335
4336 /* Calculate how many statements to be added. */
4337 unsigned int prologue_stmts = 0;
4338 unsigned int body_stmts = 0;
4339
4340 rgroup_controls *rgc;
4341 unsigned int num_vectors_m1;
4342 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)for (num_vectors_m1 = 0; ((loop_vinfo)->lens).iterate ((num_vectors_m1
), &(rgc)); ++(num_vectors_m1))
4343 if (rgc->type)
4344 {
4345 /* May need one SHIFT for nitems_total computation. */
4346 unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor;
4347 if (nitems != 1 && !niters_known_p)
4348 prologue_stmts += 1;
4349
4350 /* May need one MAX and one MINUS for wrap around. */
4351 if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc))
4352 prologue_stmts += 2;
4353
4354 /* Need one MAX and one MINUS for each batch limit excepting for
4355 the 1st one. */
4356 prologue_stmts += num_vectors_m1 * 2;
4357
4358 unsigned int num_vectors = num_vectors_m1 + 1;
4359
4360 /* Need to set up lengths in prologue, only one MIN required
4361 for each since start index is zero. */
4362 prologue_stmts += num_vectors;
4363
4364 /* If we have a non-zero partial load bias, we need one PLUS
4365 to adjust the load length. */
4366 if (partial_load_store_bias != 0)
4367 body_stmts += 1;
4368
4369 /* Each may need two MINs and one MINUS to update lengths in body
4370 for next iteration. */
4371 if (need_iterate_p)
4372 body_stmts += 3 * num_vectors;
4373 }
4374
4375 (void) add_stmt_cost (target_cost_data, prologue_stmts,
4376 scalar_stmt, vect_prologue);
4377 (void) add_stmt_cost (target_cost_data, body_stmts,
4378 scalar_stmt, vect_body);
4379 }
4380
4381 /* FORNOW: The scalar outside cost is incremented in one of the
4382 following ways:
4383
4384 1. The vectorizer checks for alignment and aliasing and generates
4385 a condition that allows dynamic vectorization. A cost model
4386 check is ANDED with the versioning condition. Hence scalar code
4387 path now has the added cost of the versioning check.
4388
4389 if (cost > th & versioning_check)
4390 jmp to vector code
4391
4392 Hence run-time scalar is incremented by not-taken branch cost.
4393
4394 2. The vectorizer then checks if a prologue is required. If the
4395 cost model check was not done before during versioning, it has to
4396 be done before the prologue check.
4397
4398 if (cost <= th)
4399 prologue = scalar_iters
4400 if (prologue == 0)
4401 jmp to vector code
4402 else
4403 execute prologue
4404 if (prologue == num_iters)
4405 go to exit
4406
4407 Hence the run-time scalar cost is incremented by a taken branch,
4408 plus a not-taken branch, plus a taken branch cost.
4409
4410 3. The vectorizer then checks if an epilogue is required. If the
4411 cost model check was not done before during prologue check, it
4412 has to be done with the epilogue check.
4413
4414 if (prologue == 0)
4415 jmp to vector code
4416 else
4417 execute prologue
4418 if (prologue == num_iters)
4419 go to exit
4420 vector code:
4421 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
4422 jmp to epilogue
4423
4424 Hence the run-time scalar cost should be incremented by 2 taken
4425 branches.
4426
4427 TODO: The back end may reorder the BBS's differently and reverse
4428 conditions/branch directions. Change the estimates below to
4429 something more reasonable. */
4430
4431 /* If the number of iterations is known and we do not do versioning, we can
4432 decide whether to vectorize at compile time. Hence the scalar version
4433 do not carry cost model guard costs. */
4434 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
4435 || LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
)
4436 {
4437 /* Cost model check occurs at versioning. */
4438 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
)
4439 scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
4440 else
4441 {
4442 /* Cost model check occurs at prologue generation. */
4443 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0)
4444 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
4445 + vect_get_stmt_cost (cond_branch_not_taken);
4446 /* Cost model check occurs at epilogue generation. */
4447 else
4448 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
4449 }
4450 }
4451
4452 /* Complete the target-specific cost calculations. */
4453 finish_cost (loop_vinfo->vector_costs, loop_vinfo->scalar_costs,
4454 &vec_prologue_cost, &vec_inside_cost, &vec_epilogue_cost,
4455 suggested_unroll_factor);
4456
4457 if (suggested_unroll_factor && *suggested_unroll_factor > 1
4458 && LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor != MAX_VECTORIZATION_FACTOR2147483647
4459 && !known_le (LOOP_VINFO_VECT_FACTOR (loop_vinfo) *(!maybe_lt ((loop_vinfo)->max_vectorization_factor, (loop_vinfo
)->vectorization_factor * *suggested_unroll_factor))
4460 *suggested_unroll_factor,(!maybe_lt ((loop_vinfo)->max_vectorization_factor, (loop_vinfo
)->vectorization_factor * *suggested_unroll_factor))
4461 LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo))(!maybe_lt ((loop_vinfo)->max_vectorization_factor, (loop_vinfo
)->vectorization_factor * *suggested_unroll_factor))
)
4462 {
4463 if (dump_enabled_p ())
4464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4465 "can't unroll as unrolled vectorization factor larger"
4466 " than maximum vectorization factor: "
4467 HOST_WIDE_INT_PRINT_UNSIGNED"%" "l" "u" "\n",
4468 LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor);
4469 *suggested_unroll_factor = 1;
4470 }
4471
4472 vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
4473
4474 if (dump_enabled_p ())
4475 {
4476 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
4477 dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n",
4478 vec_inside_cost);
4479 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n",
4480 vec_prologue_cost);
4481 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n",
4482 vec_epilogue_cost);
4483 dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n",
4484 scalar_single_iter_cost);
4485 dump_printf (MSG_NOTE, " Scalar outside cost: %d\n",
4486 scalar_outside_cost);
4487 dump_printf (MSG_NOTE, " Vector outside cost: %d\n",
4488 vec_outside_cost);
4489 dump_printf (MSG_NOTE, " prologue iterations: %d\n",
4490 peel_iters_prologue);
4491 dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
4492 peel_iters_epilogue);
4493 }
4494
4495 /* Calculate number of iterations required to make the vector version
4496 profitable, relative to the loop bodies only. The following condition
4497 must hold true:
4498 SIC * niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC
4499 where
4500 SIC = scalar iteration cost, VIC = vector iteration cost,
4501 VOC = vector outside cost, VF = vectorization factor,
4502 NPEEL = prologue iterations + epilogue iterations,
4503 SOC = scalar outside cost for run time cost model check. */
4504
4505 int saving_per_viter = (scalar_single_iter_cost * assumed_vf
4506 - vec_inside_cost);
4507 if (saving_per_viter <= 0)
4508 {
4509 if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->force_vectorize)
4510 warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd,
4511 "vectorization did not happen for a simd loop");
4512
4513 if (dump_enabled_p ())
4514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4515 "cost model: the vector iteration cost = %d "
4516 "divided by the scalar iteration cost = %d "
4517 "is greater or equal to the vectorization factor = %d"
4518 ".\n",
4519 vec_inside_cost, scalar_single_iter_cost, assumed_vf);
4520 *ret_min_profitable_niters = -1;
4521 *ret_min_profitable_estimate = -1;
4522 return;
4523 }
4524
4525 /* ??? The "if" arm is written to handle all cases; see below for what
4526 we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
4527 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4528 {
4529 /* Rewriting the condition above in terms of the number of
4530 vector iterations (vniters) rather than the number of
4531 scalar iterations (niters) gives:
4532
4533 SIC * (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC
4534
4535 <==> vniters * (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC
4536
4537 For integer N, X and Y when X > 0:
4538
4539 N * X > Y <==> N >= (Y /[floor] X) + 1. */
4540 int outside_overhead = (vec_outside_cost
4541 - scalar_single_iter_cost * peel_iters_prologue
4542 - scalar_single_iter_cost * peel_iters_epilogue
4543 - scalar_outside_cost);
4544 /* We're only interested in cases that require at least one
4545 vector iteration. */
4546 int min_vec_niters = 1;
4547 if (outside_overhead > 0)
4548 min_vec_niters = outside_overhead / saving_per_viter + 1;
4549
4550 if (dump_enabled_p ())
4551 dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n",
4552 min_vec_niters);
4553
4554 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4555 {
4556 /* Now that we know the minimum number of vector iterations,
4557 find the minimum niters for which the scalar cost is larger:
4558
4559 SIC * niters > VIC * vniters + VOC - SOC
4560
4561 We know that the minimum niters is no more than
4562 vniters * VF + NPEEL, but it might be (and often is) less
4563 than that if a partial vector iteration is cheaper than the
4564 equivalent scalar code. */
4565 int threshold = (vec_inside_cost * min_vec_niters
4566 + vec_outside_cost
4567 - scalar_outside_cost);
4568 if (threshold <= 0)
4569 min_profitable_iters = 1;
4570 else
4571 min_profitable_iters = threshold / scalar_single_iter_cost + 1;
4572 }
4573 else
4574 /* Convert the number of vector iterations into a number of
4575 scalar iterations. */
4576 min_profitable_iters = (min_vec_niters * assumed_vf
4577 + peel_iters_prologue
4578 + peel_iters_epilogue);
4579 }
4580 else
4581 {
4582 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost)
4583 * assumed_vf
4584 - vec_inside_cost * peel_iters_prologue
4585 - vec_inside_cost * peel_iters_epilogue);
4586 if (min_profitable_iters <= 0)
4587 min_profitable_iters = 0;
4588 else
4589 {
4590 min_profitable_iters /= saving_per_viter;
4591
4592 if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters)
4593 <= (((int) vec_inside_cost * min_profitable_iters)
4594 + (((int) vec_outside_cost - scalar_outside_cost)
4595 * assumed_vf)))
4596 min_profitable_iters++;
4597 }
4598 }
4599
4600 if (dump_enabled_p ())
4601 dump_printf (MSG_NOTE,
4602 " Calculated minimum iters for profitability: %d\n",
4603 min_profitable_iters);
4604
4605 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p
4606 && min_profitable_iters < (assumed_vf + peel_iters_prologue))
4607 /* We want the vectorized loop to execute at least once. */
4608 min_profitable_iters = assumed_vf + peel_iters_prologue;
4609 else if (min_profitable_iters < peel_iters_prologue)
4610 /* For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the
4611 vectorized loop executes at least once. */
4612 min_profitable_iters = peel_iters_prologue;
4613
4614 if (dump_enabled_p ())
4615 dump_printf_loc (MSG_NOTE, vect_location,
4616 " Runtime profitability threshold = %d\n",
4617 min_profitable_iters);
4618
4619 *ret_min_profitable_niters = min_profitable_iters;
4620
4621 /* Calculate number of iterations required to make the vector version
4622 profitable, relative to the loop bodies only.
4623
4624 Non-vectorized variant is SIC * niters and it must win over vector
4625 variant on the expected loop trip count. The following condition must hold true:
4626 SIC * niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC */
4627
4628 if (vec_outside_cost <= 0)
4629 min_profitable_estimate = 0;
4630 /* ??? This "else if" arm is written to handle all cases; see below for
4631 what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
4632 else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4633 {
4634 /* This is a repeat of the code above, but with + SOC rather
4635 than - SOC. */
4636 int outside_overhead = (vec_outside_cost
4637 - scalar_single_iter_cost * peel_iters_prologue
4638 - scalar_single_iter_cost * peel_iters_epilogue
4639 + scalar_outside_cost);
4640 int min_vec_niters = 1;
4641 if (outside_overhead > 0)
4642 min_vec_niters = outside_overhead / saving_per_viter + 1;
4643
4644 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4645 {
4646 int threshold = (vec_inside_cost * min_vec_niters
4647 + vec_outside_cost
4648 + scalar_outside_cost);
4649 min_profitable_estimate = threshold / scalar_single_iter_cost + 1;
4650 }
4651 else
4652 min_profitable_estimate = (min_vec_niters * assumed_vf
4653 + peel_iters_prologue
4654 + peel_iters_epilogue);
4655 }
4656 else
4657 {
4658 min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost)
4659 * assumed_vf
4660 - vec_inside_cost * peel_iters_prologue
4661 - vec_inside_cost * peel_iters_epilogue)
4662 / ((scalar_single_iter_cost * assumed_vf)
4663 - vec_inside_cost);
4664 }
4665 min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters)((min_profitable_estimate) > (min_profitable_iters) ? (min_profitable_estimate
) : (min_profitable_iters))
;
4666 if (dump_enabled_p ())
4667 dump_printf_loc (MSG_NOTE, vect_location,
4668 " Static estimate profitability threshold = %d\n",
4669 min_profitable_estimate);
4670
4671 *ret_min_profitable_estimate = min_profitable_estimate;
4672}
4673
4674/* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
4675 vector elements (not bits) for a vector with NELT elements. */
4676static void
4677calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
4678 vec_perm_builder *sel)
4679{
4680 /* The encoding is a single stepped pattern. Any wrap-around is handled
4681 by vec_perm_indices. */
4682 sel->new_vector (nelt, 1, 3);
4683 for (unsigned int i = 0; i < 3; i++)
4684 sel->quick_push (i + offset);
4685}
4686
4687/* Checks whether the target supports whole-vector shifts for vectors of mode
4688 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
4689 it supports vec_perm_const with masks for all necessary shift amounts. */
4690static bool
4691have_whole_vector_shift (machine_mode mode)
4692{
4693 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4694 return true;
4695
4696 /* Variable-length vectors should be handled via the optab. */
4697 unsigned int nelt;
4698 if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
4699 return false;
4700
4701 vec_perm_builder sel;
4702 vec_perm_indices indices;
4703 for (unsigned int i = nelt / 2; i >= 1; i /= 2)
4704 {
4705 calc_vec_perm_mask_for_shift (i, nelt, &sel);
4706 indices.new_vector (sel, 2, nelt);
4707 if (!can_vec_perm_const_p (mode, mode, indices, false))
4708 return false;
4709 }
4710 return true;
4711}
4712
4713/* Return true if (a) STMT_INFO is a DOT_PROD_EXPR reduction whose
4714 multiplication operands have differing signs and (b) we intend
4715 to emulate the operation using a series of signed DOT_PROD_EXPRs.
4716 See vect_emulate_mixed_dot_prod for the actual sequence used. */
4717
4718static bool
4719vect_is_emulated_mixed_dot_prod (loop_vec_info loop_vinfo,
4720 stmt_vec_info stmt_info)
4721{
4722 gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
4723 if (!assign || gimple_assign_rhs_code (assign) != DOT_PROD_EXPR)
4724 return false;
4725
4726 tree rhs1 = gimple_assign_rhs1 (assign);
4727 tree rhs2 = gimple_assign_rhs2 (assign);
4728 if (TYPE_SIGN (TREE_TYPE (rhs1))((signop) ((tree_class_check ((((contains_struct_check ((rhs1
), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4728, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4728, __FUNCTION__))->base.u.bits.unsigned_flag))
== TYPE_SIGN (TREE_TYPE (rhs2))((signop) ((tree_class_check ((((contains_struct_check ((rhs2
), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4728, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4728, __FUNCTION__))->base.u.bits.unsigned_flag))
)
4729 return false;
4730
4731 stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
4732 gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4732, __FUNCTION__), 0 : 0))
;
4733 return !directly_supported_p (DOT_PROD_EXPR,
4734 STMT_VINFO_REDUC_VECTYPE_IN (reduc_info)(reduc_info)->reduc_vectype_in,
4735 optab_vector_mixed_sign);
4736}
4737
4738/* TODO: Close dependency between vect_model_*_cost and vectorizable_*
4739 functions. Design better to avoid maintenance issues. */
4740
4741/* Function vect_model_reduction_cost.
4742
4743 Models cost for a reduction operation, including the vector ops
4744 generated within the strip-mine loop in some cases, the initial
4745 definition before the loop, and the epilogue code that must be generated. */
4746
4747static void
4748vect_model_reduction_cost (loop_vec_info loop_vinfo,
4749 stmt_vec_info stmt_info, internal_fn reduc_fn,
4750 vect_reduction_type reduction_type,
4751 int ncopies, stmt_vector_for_cost *cost_vec)
4752{
4753 int prologue_cost = 0, epilogue_cost = 0, inside_cost = 0;
4754 tree vectype;
4755 machine_mode mode;
4756 class loop *loop = NULLnullptr;
4757
4758 if (loop_vinfo)
4759 loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
4760
4761 /* Condition reductions generate two reductions in the loop. */
4762 if (reduction_type == COND_REDUCTION)
4763 ncopies *= 2;
4764
4765 vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype;
4766 mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4766, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
;
4767 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
4768
4769 gimple_match_op op;
4770 if (!gimple_extract_op (orig_stmt_info->stmt, &op))
4771 gcc_unreachable ()(fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4771, __FUNCTION__))
;
4772
4773 bool emulated_mixed_dot_prod
4774 = vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info);
4775 if (reduction_type == EXTRACT_LAST_REDUCTION)
4776 /* No extra instructions are needed in the prologue. The loop body
4777 operations are costed in vectorizable_condition. */
4778 inside_cost = 0;
4779 else if (reduction_type == FOLD_LEFT_REDUCTION)
4780 {
4781 /* No extra instructions needed in the prologue. */
4782 prologue_cost = 0;
4783
4784 if (reduc_fn != IFN_LAST)
4785 /* Count one reduction-like operation per vector. */
4786 inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar,
4787 stmt_info, 0, vect_body);
4788 else
4789 {
4790 /* Use NELEMENTS extracts and NELEMENTS scalar ops. */
4791 unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
4792 inside_cost = record_stmt_cost (cost_vec, nelements,
4793 vec_to_scalar, stmt_info, 0,
4794 vect_body);
4795 inside_cost += record_stmt_cost (cost_vec, nelements,
4796 scalar_stmt, stmt_info, 0,
4797 vect_body);
4798 }
4799 }
4800 else
4801 {
4802 /* Add in the cost of the initial definitions. */
4803 int prologue_stmts;
4804 if (reduction_type == COND_REDUCTION)
4805 /* For cond reductions we have four vectors: initial index, step,
4806 initial result of the data reduction, initial value of the index
4807 reduction. */
4808 prologue_stmts = 4;
4809 else if (emulated_mixed_dot_prod)
4810 /* We need the initial reduction value and two invariants:
4811 one that contains the minimum signed value and one that
4812 contains half of its negative. */
4813 prologue_stmts = 3;
4814 else
4815 prologue_stmts = 1;
4816 prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
4817 scalar_to_vec, stmt_info, 0,
4818 vect_prologue);
4819 }
4820
4821 /* Determine cost of epilogue code.
4822
4823 We have a reduction operator that will reduce the vector in one statement.
4824 Also requires scalar extract. */
4825
4826 if (!loop || !nested_in_vect_loop_p (loop, orig_stmt_info))
4827 {
4828 if (reduc_fn != IFN_LAST)
4829 {
4830 if (reduction_type == COND_REDUCTION)
4831 {
4832 /* An EQ stmt and an COND_EXPR stmt. */
4833 epilogue_cost += record_stmt_cost (cost_vec, 2,
4834 vector_stmt, stmt_info, 0,
4835 vect_epilogue);
4836 /* Reduction of the max index and a reduction of the found
4837 values. */
4838 epilogue_cost += record_stmt_cost (cost_vec, 2,
4839 vec_to_scalar, stmt_info, 0,
4840 vect_epilogue);
4841 /* A broadcast of the max value. */
4842 epilogue_cost += record_stmt_cost (cost_vec, 1,
4843 scalar_to_vec, stmt_info, 0,
4844 vect_epilogue);
4845 }
4846 else
4847 {
4848 epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
4849 stmt_info, 0, vect_epilogue);
4850 epilogue_cost += record_stmt_cost (cost_vec, 1,
4851 vec_to_scalar, stmt_info, 0,
4852 vect_epilogue);
4853 }
4854 }
4855 else if (reduction_type == COND_REDUCTION)
4856 {
4857 unsigned estimated_nunits = vect_nunits_for_cost (vectype);
4858 /* Extraction of scalar elements. */
4859 epilogue_cost += record_stmt_cost (cost_vec,
4860 2 * estimated_nunits,
4861 vec_to_scalar, stmt_info, 0,
4862 vect_epilogue);
4863 /* Scalar max reductions via COND_EXPR / MAX_EXPR. */
4864 epilogue_cost += record_stmt_cost (cost_vec,
4865 2 * estimated_nunits - 3,
4866 scalar_stmt, stmt_info, 0,
4867 vect_epilogue);
4868 }
4869 else if (reduction_type == EXTRACT_LAST_REDUCTION
4870 || reduction_type == FOLD_LEFT_REDUCTION)
4871 /* No extra instructions need in the epilogue. */
4872 ;
4873 else
4874 {
4875 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)((tree_class_check ((vectype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4875, __FUNCTION__))->type_common.size)
);
4876 tree bitsize = TYPE_SIZE (op.type)((tree_class_check ((op.type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4876, __FUNCTION__))->type_common.size)
;
4877 int element_bitsize = tree_to_uhwi (bitsize);
4878 int nelements = vec_size_in_bits / element_bitsize;
4879
4880 if (op.code == COND_EXPR)
4881 op.code = MAX_EXPR;
4882
4883 /* We have a whole vector shift available. */
4884 if (VECTOR_MODE_P (mode)(((enum mode_class) mode_class[mode]) == MODE_VECTOR_BOOL || (
(enum mode_class) mode_class[mode]) == MODE_VECTOR_INT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_FLOAT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_FRACT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_UFRACT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_ACCUM || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_UACCUM)
4885 && directly_supported_p (op.code, vectype)
4886 && have_whole_vector_shift (mode))
4887 {
4888 /* Final reduction via vector shifts and the reduction operator.
4889 Also requires scalar extract. */
4890 epilogue_cost += record_stmt_cost (cost_vec,
4891 exact_log2 (nelements) * 2,
4892 vector_stmt, stmt_info, 0,
4893 vect_epilogue);
4894 epilogue_cost += record_stmt_cost (cost_vec, 1,
4895 vec_to_scalar, stmt_info, 0,
4896 vect_epilogue);
4897 }
4898 else
4899 /* Use extracts and reduction op for final reduction. For N
4900 elements, we have N extracts and N-1 reduction ops. */
4901 epilogue_cost += record_stmt_cost (cost_vec,
4902 nelements + nelements - 1,
4903 vector_stmt, stmt_info, 0,
4904 vect_epilogue);
4905 }
4906 }
4907
4908 if (dump_enabled_p ())
4909 dump_printf (MSG_NOTE,
4910 "vect_model_reduction_cost: inside_cost = %d, "
4911 "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
4912 prologue_cost, epilogue_cost);
4913}
4914
4915/* SEQ is a sequence of instructions that initialize the reduction
4916 described by REDUC_INFO. Emit them in the appropriate place. */
4917
4918static void
4919vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo,
4920 stmt_vec_info reduc_info, gimple *seq)
4921{
4922 if (reduc_info->reused_accumulator)
4923 {
4924 /* When reusing an accumulator from the main loop, we only need
4925 initialization instructions if the main loop can be skipped.
4926 In that case, emit the initialization instructions at the end
4927 of the guard block that does the skip. */
4928 edge skip_edge = loop_vinfo->skip_main_loop_edge;
4929 gcc_assert (skip_edge)((void)(!(skip_edge) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4929, __FUNCTION__), 0 : 0))
;
4930 gimple_stmt_iterator gsi = gsi_last_bb (skip_edge->src);
4931 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
4932 }
4933 else
4934 {
4935 /* The normal case: emit the initialization instructions on the
4936 preheader edge. */
4937 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
4938 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), seq);
4939 }
4940}
4941
4942/* Function get_initial_def_for_reduction
4943
4944 Input:
4945 REDUC_INFO - the info_for_reduction
4946 INIT_VAL - the initial value of the reduction variable
4947 NEUTRAL_OP - a value that has no effect on the reduction, as per
4948 neutral_op_for_reduction
4949
4950 Output:
4951 Return a vector variable, initialized according to the operation that
4952 STMT_VINFO performs. This vector will be used as the initial value
4953 of the vector of partial results.
4954
4955 The value we need is a vector in which element 0 has value INIT_VAL
4956 and every other element has value NEUTRAL_OP. */
4957
4958static tree
4959get_initial_def_for_reduction (loop_vec_info loop_vinfo,
4960 stmt_vec_info reduc_info,
4961 tree init_val, tree neutral_op)
4962{
4963 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
4964 tree scalar_type = TREE_TYPE (init_val)((contains_struct_check ((init_val), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4964, __FUNCTION__))->typed.type)
;
4965 tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
4966 tree init_def;
4967 gimple_seq stmts = NULLnullptr;
4968
4969 gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4969, __FUNCTION__), 0 : 0))
;
4970
4971 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE
)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4972, __FUNCTION__), 0 : 0))
4972 || SCALAR_FLOAT_TYPE_P (scalar_type))((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE
)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4972, __FUNCTION__), 0 : 0))
;
4973
4974 gcc_assert (nested_in_vect_loop_p (loop, reduc_info)((void)(!(nested_in_vect_loop_p (loop, reduc_info) || loop ==
(gimple_bb (reduc_info->stmt))->loop_father) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4975, __FUNCTION__), 0 : 0))
4975 || loop == (gimple_bb (reduc_info->stmt))->loop_father)((void)(!(nested_in_vect_loop_p (loop, reduc_info) || loop ==
(gimple_bb (reduc_info->stmt))->loop_father) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4975, __FUNCTION__), 0 : 0))
;
4976
4977 if (operand_equal_p (init_val, neutral_op))
4978 {
4979 /* If both elements are equal then the vector described above is
4980 just a splat. */
4981 neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4981, __FUNCTION__))->typed.type)
, neutral_op);
4982 init_def = gimple_build_vector_from_val (&stmts, vectype, neutral_op);
4983 }
4984 else
4985 {
4986 neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4986, __FUNCTION__))->typed.type)
, neutral_op);
4987 init_val = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 4987, __FUNCTION__))->typed.type)
, init_val);
4988 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ())
4989 {
4990 /* Construct a splat of NEUTRAL_OP and insert INIT_VAL into
4991 element 0. */
4992 init_def = gimple_build_vector_from_val (&stmts, vectype,
4993 neutral_op);
4994 init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT,
4995 vectype, init_def, init_val);
4996 }
4997 else
4998 {
4999 /* Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. */
5000 tree_vector_builder elts (vectype, 1, 2);
5001 elts.quick_push (init_val);
5002 elts.quick_push (neutral_op);
5003 init_def = gimple_build_vector (&stmts, &elts);
5004 }
5005 }
5006
5007 if (stmts)
5008 vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, stmts);
5009 return init_def;
5010}
5011
5012/* Get at the initial defs for the reduction PHIs for REDUC_INFO,
5013 which performs a reduction involving GROUP_SIZE scalar statements.
5014 NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP
5015 is nonnull, introducing extra elements of that value will not change the
5016 result. */
5017
5018static void
5019get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
5020 stmt_vec_info reduc_info,
5021 vec<tree> *vec_oprnds,
5022 unsigned int number_of_vectors,
5023 unsigned int group_size, tree neutral_op)
5024{
5025 vec<tree> &initial_values = reduc_info->reduc_initial_values;
5026 unsigned HOST_WIDE_INTlong nunits;
5027 unsigned j, number_of_places_left_in_vector;
5028 tree vector_type = STMT_VINFO_VECTYPE (reduc_info)(reduc_info)->vectype;
5029 unsigned int i;
5030
5031 gcc_assert (group_size == initial_values.length () || neutral_op)((void)(!(group_size == initial_values.length () || neutral_op
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5031, __FUNCTION__), 0 : 0))
;
5032
5033 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
5034 created vectors. It is greater than 1 if unrolling is performed.
5035
5036 For example, we have two scalar operands, s1 and s2 (e.g., group of
5037 strided accesses of size two), while NUNITS is four (i.e., four scalars
5038 of this type can be packed in a vector). The output vector will contain
5039 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
5040 will be 2).
5041
5042 If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several
5043 vectors containing the operands.
5044
5045 For example, NUNITS is four as before, and the group size is 8
5046 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
5047 {s5, s6, s7, s8}. */
5048
5049 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
5050 nunits = group_size;
5051
5052 number_of_places_left_in_vector = nunits;
5053 bool constant_p = true;
5054 tree_vector_builder elts (vector_type, nunits, 1);
5055 elts.quick_grow (nunits);
5056 gimple_seq ctor_seq = NULLnullptr;
5057 for (j = 0; j < nunits * number_of_vectors; ++j)
5058 {
5059 tree op;
5060 i = j % group_size;
5061
5062 /* Get the def before the loop. In reduction chain we have only
5063 one initial value. Else we have as many as PHIs in the group. */
5064 if (i >= initial_values.length () || (j > i && neutral_op))
5065 op = neutral_op;
5066 else
5067 op = initial_values[i];
5068
5069 /* Create 'vect_ = {op0,op1,...,opn}'. */
5070 number_of_places_left_in_vector--;
5071 elts[nunits - number_of_places_left_in_vector - 1] = op;
5072 if (!CONSTANT_CLASS_P (op)(tree_code_type_tmpl <0>::tree_code_type[(int) (((enum tree_code
) (op)->base.code))] == tcc_constant)
)
5073 constant_p = false;
5074
5075 if (number_of_places_left_in_vector == 0)
5076 {
5077 tree init;
5078 if (constant_p && !neutral_op
5079 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
5080 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type), nunits)))
5081 /* Build the vector directly from ELTS. */
5082 init = gimple_build_vector (&ctor_seq, &elts);
5083 else if (neutral_op)
5084 {
5085 /* Build a vector of the neutral value and shift the
5086 other elements into place. */
5087 init = gimple_build_vector_from_val (&ctor_seq, vector_type,
5088 neutral_op);
5089 int k = nunits;
5090 while (k > 0 && elts[k - 1] == neutral_op)
5091 k -= 1;
5092 while (k > 0)
5093 {
5094 k -= 1;
5095 init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
5096 vector_type, init, elts[k]);
5097 }
5098 }
5099 else
5100 {
5101 /* First time round, duplicate ELTS to fill the
5102 required number of vectors. */
5103 duplicate_and_interleave (loop_vinfo, &ctor_seq, vector_type,
5104 elts, number_of_vectors, *vec_oprnds);
5105 break;
5106 }
5107 vec_oprnds->quick_push (init);
5108
5109 number_of_places_left_in_vector = nunits;
5110 elts.new_vector (vector_type, nunits, 1);
5111 elts.quick_grow (nunits);
5112 constant_p = true;
5113 }
5114 }
5115 if (ctor_seq != NULLnullptr)
5116 vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, ctor_seq);
5117}
5118
5119/* For a statement STMT_INFO taking part in a reduction operation return
5120 the stmt_vec_info the meta information is stored on. */
5121
5122stmt_vec_info
5123info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info)
5124{
5125 stmt_info = vect_orig_stmt (stmt_info);
5126 gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info))((void)(!((stmt_info)->reduc_def) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5126, __FUNCTION__), 0 : 0))
;
5127 if (!is_a <gphi *> (stmt_info->stmt)
5128 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info
)->def_type) == vect_double_reduction_def) || (((stmt_info
)->def_type) == vect_nested_cycle))
)
5129 stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def;
5130 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5131 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def)
5132 {
5133 if (gimple_phi_num_args (phi) == 1)
5134 stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def;
5135 }
5136 else if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle)
5137 {
5138 stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi));
5139 if (info && STMT_VINFO_DEF_TYPE (info)(info)->def_type == vect_double_reduction_def)
5140 stmt_info = info;
5141 }
5142 return stmt_info;
5143}
5144
5145/* See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that
5146 REDUC_INFO can build on. Adjust REDUC_INFO and return true if so, otherwise
5147 return false. */
5148
5149static bool
5150vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
5151 stmt_vec_info reduc_info)
5152{
5153 loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info;
5154 if (!main_loop_vinfo)
5155 return false;
5156
5157 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type != TREE_CODE_REDUCTION)
5158 return false;
5159
5160 unsigned int num_phis = reduc_info->reduc_initial_values.length ();
5161 auto_vec<tree, 16> main_loop_results (num_phis);
5162 auto_vec<tree, 16> initial_values (num_phis);
5163 if (edge main_loop_edge = loop_vinfo->main_loop_edge)
5164 {
5165 /* The epilogue loop can be entered either from the main loop or
5166 from an earlier guard block. */
5167 edge skip_edge = loop_vinfo->skip_main_loop_edge;
5168 for (tree incoming_value : reduc_info->reduc_initial_values)
5169 {
5170 /* Look for:
5171
5172 INCOMING_VALUE = phi<MAIN_LOOP_RESULT(main loop),
5173 INITIAL_VALUE(guard block)>. */
5174 gcc_assert (TREE_CODE (incoming_value) == SSA_NAME)((void)(!(((enum tree_code) (incoming_value)->base.code) ==
SSA_NAME) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5174, __FUNCTION__), 0 : 0))
;
5175
5176 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (incoming_value)(tree_check ((incoming_value), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5176, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
);
5177 gcc_assert (gimple_bb (phi) == main_loop_edge->dest)((void)(!(gimple_bb (phi) == main_loop_edge->dest) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5177, __FUNCTION__), 0 : 0))
;
5178
5179 tree from_main_loop = PHI_ARG_DEF_FROM_EDGE (phi, main_loop_edge)gimple_phi_arg_def (((phi)), ((main_loop_edge)->dest_idx));
5180 tree from_skip = PHI_ARG_DEF_FROM_EDGE (phi, skip_edge)gimple_phi_arg_def (((phi)), ((skip_edge)->dest_idx));
5181
5182 main_loop_results.quick_push (from_main_loop);
5183 initial_values.quick_push (from_skip);
5184 }
5185 }
5186 else
5187 /* The main loop dominates the epilogue loop. */
5188 main_loop_results.splice (reduc_info->reduc_initial_values);
5189
5190 /* See if the main loop has the kind of accumulator we need. */
5191 vect_reusable_accumulator *accumulator
5192 = main_loop_vinfo->reusable_accumulators.get (main_loop_results[0]);
5193 if (!accumulator
5194 || num_phis != accumulator->reduc_info->reduc_scalar_results.length ()
5195 || !std::equal (main_loop_results.begin (), main_loop_results.end (),
5196 accumulator->reduc_info->reduc_scalar_results.begin ()))
5197 return false;
5198
5199 /* Handle the case where we can reduce wider vectors to narrower ones. */
5200 tree vectype = STMT_VINFO_VECTYPE (reduc_info)(reduc_info)->vectype;
5201 tree old_vectype = TREE_TYPE (accumulator->reduc_input)((contains_struct_check ((accumulator->reduc_input), (TS_TYPED
), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5201, __FUNCTION__))->typed.type)
;
5202 unsigned HOST_WIDE_INTlong m;
5203 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype),
5204 TYPE_VECTOR_SUBPARTS (vectype), &m))
5205 return false;
5206 /* Check the intermediate vector types and operations are available. */
5207 tree prev_vectype = old_vectype;
5208 poly_uint64 intermediate_nunits = TYPE_VECTOR_SUBPARTS (old_vectype);
5209 while (known_gt (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype))(!maybe_le (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype
)))
)
5210 {
5211 intermediate_nunits = exact_div (intermediate_nunits, 2);
5212 tree intermediate_vectype = get_related_vectype_for_scalar_type
5213 (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5213, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5213, __FUNCTION__))->typed.type)
, intermediate_nunits);
5214 if (!intermediate_vectype
5215 || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code,
5216 intermediate_vectype)
5217 || !can_vec_extract (TYPE_MODE (prev_vectype)((((enum tree_code) ((tree_class_check ((prev_vectype), (tcc_type
), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5217, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(prev_vectype) : (prev_vectype)->type_common.mode)
,
5218 TYPE_MODE (intermediate_vectype)((((enum tree_code) ((tree_class_check ((intermediate_vectype
), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5218, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(intermediate_vectype) : (intermediate_vectype)->type_common
.mode)
))
5219 return false;
5220 prev_vectype = intermediate_vectype;
5221 }
5222
5223 /* Non-SLP reductions might apply an adjustment after the reduction
5224 operation, in order to simplify the initialization of the accumulator.
5225 If the epilogue loop carries on from where the main loop left off,
5226 it should apply the same adjustment to the final reduction result.
5227
5228 If the epilogue loop can also be entered directly (rather than via
5229 the main loop), we need to be able to handle that case in the same way,
5230 with the same adjustment. (In principle we could add a PHI node
5231 to select the correct adjustment, but in practice that shouldn't be
5232 necessary.) */
5233 tree main_adjustment
5234 = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info)(accumulator->reduc_info)->reduc_epilogue_adjustment;
5235 if (loop_vinfo->main_loop_edge && main_adjustment)
5236 {
5237 gcc_assert (num_phis == 1)((void)(!(num_phis == 1) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5237, __FUNCTION__), 0 : 0))
;
5238 tree initial_value = initial_values[0];
5239 /* Check that we can use INITIAL_VALUE as the adjustment and
5240 initialize the accumulator with a neutral value instead. */
5241 if (!operand_equal_p (initial_value, main_adjustment))
5242 return false;
5243 code_helper code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code;
5244 initial_values[0] = neutral_op_for_reduction (TREE_TYPE (initial_value)((contains_struct_check ((initial_value), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5244, __FUNCTION__))->typed.type)
,
5245 code, initial_value);
5246 }
5247 STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment = main_adjustment;
5248 reduc_info->reduc_initial_values.truncate (0);
5249 reduc_info->reduc_initial_values.splice (initial_values);
5250 reduc_info->reused_accumulator = accumulator;
5251 return true;
5252}
5253
5254/* Reduce the vector VEC_DEF down to VECTYPE with reduction operation
5255 CODE emitting stmts before GSI. Returns a vector def of VECTYPE. */
5256
5257static tree
5258vect_create_partial_epilog (tree vec_def, tree vectype, code_helper code,
5259 gimple_seq *seq)
5260{
5261 unsigned nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec_def)((contains_struct_check ((vec_def), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5261, __FUNCTION__))->typed.type)
).to_constant ();
5262 unsigned nunits1 = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
5263 tree stype = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5263, __FUNCTION__))->typed.type)
;
5264 tree new_temp = vec_def;
5265 while (nunits > nunits1)
5266 {
5267 nunits /= 2;
5268 tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5268, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
,
5269 stype, nunits);
5270 unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5270, __FUNCTION__))->type_common.size)
);
5271
5272 /* The target has to make sure we support lowpart/highpart
5273 extraction, either via direct vector extract or through
5274 an integer mode punning. */
5275 tree dst1, dst2;
5276 gimple *epilog_stmt;
5277 if (convert_optab_handler (vec_extract_optab,
5278 TYPE_MODE (TREE_TYPE (new_temp))((((enum tree_code) ((tree_class_check ((((contains_struct_check
((new_temp), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5278, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5278, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(((contains_struct_check ((new_temp), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5278, __FUNCTION__))->typed.type)) : (((contains_struct_check
((new_temp), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5278, __FUNCTION__))->typed.type))->type_common.mode)
,
5279 TYPE_MODE (vectype1)((((enum tree_code) ((tree_class_check ((vectype1), (tcc_type
), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5279, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype1) : (vectype1)->type_common.mode)
)
5280 != CODE_FOR_nothing)
5281 {
5282 /* Extract sub-vectors directly once vec_extract becomes
5283 a conversion optab. */
5284 dst1 = make_ssa_name (vectype1);
5285 epilog_stmt
5286 = gimple_build_assign (dst1, BIT_FIELD_REF,
5287 build3 (BIT_FIELD_REF, vectype1,
5288 new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5288, __FUNCTION__))->type_common.size)
,
5289 bitsize_int (0)size_int_kind (0, stk_bitsizetype)));
5290 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5291 dst2 = make_ssa_name (vectype1);
5292 epilog_stmt
5293 = gimple_build_assign (dst2, BIT_FIELD_REF,
5294 build3 (BIT_FIELD_REF, vectype1,
5295 new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5295, __FUNCTION__))->type_common.size)
,
5296 bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype)));
5297 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5298 }
5299 else
5300 {
5301 /* Extract via punning to appropriately sized integer mode
5302 vector. */
5303 tree eltype = build_nonstandard_integer_type (bitsize, 1);
5304 tree etype = build_vector_type (eltype, 2);
5305 gcc_assert (convert_optab_handler (vec_extract_optab,((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5308, __FUNCTION__), 0 : 0))
5306 TYPE_MODE (etype),((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5308, __FUNCTION__), 0 : 0))
5307 TYPE_MODE (eltype))((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5308, __FUNCTION__), 0 : 0))
5308 != CODE_FOR_nothing)((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5306, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5307, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5308, __FUNCTION__), 0 : 0))
;
5309 tree tem = make_ssa_name (etype);
5310 epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR,
5311 build1 (VIEW_CONVERT_EXPR,
5312 etype, new_temp));
5313 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5314 new_temp = tem;
5315 tem = make_ssa_name (eltype);
5316 epilog_stmt
5317 = gimple_build_assign (tem, BIT_FIELD_REF,
5318 build3 (BIT_FIELD_REF, eltype,
5319 new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5319, __FUNCTION__))->type_common.size)
,
5320 bitsize_int (0)size_int_kind (0, stk_bitsizetype)));
5321 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5322 dst1 = make_ssa_name (vectype1);
5323 epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR,
5324 build1 (VIEW_CONVERT_EXPR,
5325 vectype1, tem));
5326 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5327 tem = make_ssa_name (eltype);
5328 epilog_stmt
5329 = gimple_build_assign (tem, BIT_FIELD_REF,
5330 build3 (BIT_FIELD_REF, eltype,
5331 new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5331, __FUNCTION__))->type_common.size)
,
5332 bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype)));
5333 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5334 dst2 = make_ssa_name (vectype1);
5335 epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
5336 build1 (VIEW_CONVERT_EXPR,
5337 vectype1, tem));
5338 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5339 }
5340
5341 new_temp = gimple_build (seq, code, vectype1, dst1, dst2);
5342 }
5343
5344 return new_temp;
5345}
5346
5347/* Function vect_create_epilog_for_reduction
5348
5349 Create code at the loop-epilog to finalize the result of a reduction
5350 computation.
5351
5352 STMT_INFO is the scalar reduction stmt that is being vectorized.
5353 SLP_NODE is an SLP node containing a group of reduction statements. The
5354 first one in this group is STMT_INFO.
5355 SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE
5356 REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi
5357 (counting from 0)
5358
5359 This function:
5360 1. Completes the reduction def-use cycles.
5361 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
5362 by calling the function specified by REDUC_FN if available, or by
5363 other means (whole-vector shifts or a scalar loop).
5364 The function also creates a new phi node at the loop exit to preserve
5365 loop-closed form, as illustrated below.
5366
5367 The flow at the entry to this function:
5368
5369 loop:
5370 vec_def = phi <vec_init, null> # REDUCTION_PHI
5371 VECT_DEF = vector_stmt # vectorized form of STMT_INFO
5372 s_loop = scalar_stmt # (scalar) STMT_INFO
5373 loop_exit:
5374 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
5375 use <s_out0>
5376 use <s_out0>
5377
5378 The above is transformed by this function into:
5379
5380 loop:
5381 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
5382 VECT_DEF = vector_stmt # vectorized form of STMT_INFO
5383 s_loop = scalar_stmt # (scalar) STMT_INFO
5384 loop_exit:
5385 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
5386 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
5387 v_out2 = reduce <v_out1>
5388 s_out3 = extract_field <v_out2, 0>
5389 s_out4 = adjust_result <s_out3>
5390 use <s_out4>
5391 use <s_out4>
5392*/
5393
5394static void
5395vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
5396 stmt_vec_info stmt_info,
5397 slp_tree slp_node,
5398 slp_instance slp_node_instance)
5399{
5400 stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
5401 gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5401, __FUNCTION__), 0 : 0))
;
5402 /* For double reductions we need to get at the inner loop reduction
5403 stmt which has the meta info attached. Our stmt_info is that of the
5404 loop-closed PHI of the inner loop which we remember as
5405 def for the reduction PHI generation. */
5406 bool double_reduc = false;
5407 stmt_vec_info rdef_info = stmt_info;
5408 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def)
5409 {
5410 gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5410, __FUNCTION__), 0 : 0))
;
5411 double_reduc = true;
5412 stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
5413 (stmt_info->stmt, 0));
5414 stmt_info = vect_stmt_to_vectorize (stmt_info);
5415 }
5416 gphi *reduc_def_stmt
5417 = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))(vect_orig_stmt (stmt_info))->reduc_def->stmt);
5418 code_helper code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code;
5419 internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info)(reduc_info)->reduc_fn;
5420 tree vectype;
5421 machine_mode mode;
5422 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, *outer_loop = NULLnullptr;
5423 basic_block exit_bb;
5424 tree scalar_dest;
5425 tree scalar_type;
5426 gimple *new_phi = NULLnullptr, *phi;
5427 gimple_stmt_iterator exit_gsi;
5428 tree new_temp = NULL_TREE(tree) nullptr, new_name, new_scalar_dest;
5429 gimple *epilog_stmt = NULLnullptr;
5430 gimple *exit_phi;
5431 tree bitsize;
5432 tree def;
5433 tree orig_name, scalar_result;
5434 imm_use_iterator imm_iter, phi_imm_iter;
5435 use_operand_p use_p, phi_use_p;
5436 gimple *use_stmt;
5437 auto_vec<tree> reduc_inputs;
5438 int j, i;
5439 vec<tree> &scalar_results = reduc_info->reduc_scalar_results;
5440 unsigned int group_size = 1, k;
5441 auto_vec<gimple *> phis;
5442 /* SLP reduction without reduction chain, e.g.,
5443 # a1 = phi <a2, a0>
5444 # b1 = phi <b2, b0>
5445 a2 = operation (a1)
5446 b2 = operation (b1) */
5447 bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5447, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
);
5448 bool direct_slp_reduc;
5449 tree induction_index = NULL_TREE(tree) nullptr;
5450
5451 if (slp_node)
5452 group_size = SLP_TREE_LANES (slp_node)(slp_node)->lanes;
5453
5454 if (nested_in_vect_loop_p (loop, stmt_info))
5455 {
5456 outer_loop = loop;
5457 loop = loop->inner;
5458 gcc_assert (!slp_node && double_reduc)((void)(!(!slp_node && double_reduc) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5458, __FUNCTION__), 0 : 0))
;
5459 }
5460
5461 vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info)(reduc_info)->reduc_vectype;
5462 gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5462, __FUNCTION__), 0 : 0))
;
5463 mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5463, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
;
5464
5465 tree induc_val = NULL_TREE(tree) nullptr;
5466 tree adjustment_def = NULLnullptr;
5467 if (slp_node)
5468 ;
5469 else
5470 {
5471 /* Optimize: for induction condition reduction, if we can't use zero
5472 for induc_val, use initial_def. */
5473 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION)
5474 induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)(reduc_info)->induc_cond_initial_val;
5475 else if (double_reduc)
5476 ;
5477 else
5478 adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment;
5479 }
5480
5481 stmt_vec_info single_live_out_stmt[] = { stmt_info };
5482 array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt;
5483 if (slp_reduc)
5484 /* All statements produce live-out values. */
5485 live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts;
5486 else if (slp_node)
5487 {
5488 /* The last statement in the reduction chain produces the live-out
5489 value. Note SLP optimization can shuffle scalar stmts to
5490 optimize permutations so we have to search for the last stmt. */
5491 for (k = 0; k < group_size; ++k)
5492 if (!REDUC_GROUP_NEXT_ELEMENT (SLP_TREE_SCALAR_STMTS (slp_node)[k])(((void)(!(!((slp_node)->stmts[k])->dr_aux.dr) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5492, __FUNCTION__), 0 : 0)), ((slp_node)->stmts[k])->
next_element)
)
5493 {
5494 single_live_out_stmt[0] = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts[k];
5495 break;
5496 }
5497 }
5498
5499 unsigned vec_num;
5500 int ncopies;
5501 if (slp_node)
5502 {
5503 vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)(slp_node_instance->reduc_phis)->vec_stmts.length ();
5504 ncopies = 1;
5505 }
5506 else
5507 {
5508 stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt);
5509 vec_num = 1;
5510 ncopies = STMT_VINFO_VEC_STMTS (reduc_info)(reduc_info)->vec_stmts.length ();
5511 }
5512
5513 /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
5514 which is updated with the current index of the loop for every match of
5515 the original loop's cond_expr (VEC_STMT). This results in a vector
5516 containing the last time the condition passed for that vector lane.
5517 The first match will be a 1 to allow 0 to be used for non-matching
5518 indexes. If there are no matches at all then the vector will be all
5519 zeroes.
5520
5521 PR92772: This algorithm is broken for architectures that support
5522 masked vectors, but do not provide fold_extract_last. */
5523 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION)
5524 {
5525 auto_vec<std::pair<tree, bool>, 2> ccompares;
5526 stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info)(reduc_info)->reduc_def;
5527 cond_info = vect_stmt_to_vectorize (cond_info);
5528 while (cond_info != reduc_info)
5529 {
5530 if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
5531 {
5532 gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)(cond_info)->vec_stmts[0];
5533 gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)((void)(!(gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)
? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5533, __FUNCTION__), 0 : 0))
;
5534 ccompares.safe_push
5535 (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)),
5536 STMT_VINFO_REDUC_IDX (cond_info)(cond_info)->reduc_idx == 2));
5537 }
5538 cond_info
5539 = loop_vinfo->lookup_def (gimple_op (cond_info->stmt,
5540 1 + STMT_VINFO_REDUC_IDX(cond_info)->reduc_idx
5541 (cond_info)(cond_info)->reduc_idx));
5542 cond_info = vect_stmt_to_vectorize (cond_info);
5543 }
5544 gcc_assert (ccompares.length () != 0)((void)(!(ccompares.length () != 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5544, __FUNCTION__), 0 : 0))
;
5545
5546 tree indx_before_incr, indx_after_incr;
5547 poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
5548 int scalar_precision
5549 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))(as_a <scalar_mode> ((tree_class_check ((((contains_struct_check
((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5549, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5549, __FUNCTION__))->type_common.mode))
);
5550 tree cr_index_scalar_type = make_unsigned_type (scalar_precision);
5551 tree cr_index_vector_type = get_related_vectype_for_scalar_type
5552 (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5552, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
, cr_index_scalar_type,
5553 TYPE_VECTOR_SUBPARTS (vectype));
5554
5555 /* First we create a simple vector induction variable which starts
5556 with the values {1,2,3,...} (SERIES_VECT) and increments by the
5557 vector size (STEP). */
5558
5559 /* Create a {1,2,3,...} vector. */
5560 tree series_vect = build_index_vector (cr_index_vector_type, 1, 1);
5561
5562 /* Create a vector of the step value. */
5563 tree step = build_int_cst (cr_index_scalar_type, nunits_out);
5564 tree vec_step = build_vector_from_val (cr_index_vector_type, step);
5565
5566 /* Create an induction variable. */
5567 gimple_stmt_iterator incr_gsi;
5568 bool insert_after;
5569 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5570 create_iv (series_vect, vec_step, NULL_TREE(tree) nullptr, loop, &incr_gsi,
5571 insert_after, &indx_before_incr, &indx_after_incr);
5572
5573 /* Next create a new phi node vector (NEW_PHI_TREE) which starts
5574 filled with zeros (VEC_ZERO). */
5575
5576 /* Create a vector of 0s. */
5577 tree zero = build_zero_cst (cr_index_scalar_type);
5578 tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
5579
5580 /* Create a vector phi node. */
5581 tree new_phi_tree = make_ssa_name (cr_index_vector_type);
5582 new_phi = create_phi_node (new_phi_tree, loop->header);
5583 add_phi_arg (as_a <gphi *> (new_phi), vec_zero,
5584 loop_preheader_edge (loop), UNKNOWN_LOCATION((location_t) 0));
5585
5586 /* Now take the condition from the loops original cond_exprs
5587 and produce a new cond_exprs (INDEX_COND_EXPR) which for
5588 every match uses values from the induction variable
5589 (INDEX_BEFORE_INCR) otherwise uses values from the phi node
5590 (NEW_PHI_TREE).
5591 Finally, we update the phi (NEW_PHI_TREE) to take the value of
5592 the new cond_expr (INDEX_COND_EXPR). */
5593 gimple_seq stmts = NULLnullptr;
5594 for (int i = ccompares.length () - 1; i != -1; --i)
5595 {
5596 tree ccompare = ccompares[i].first;
5597 if (ccompares[i].second)
5598 new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
5599 cr_index_vector_type,
5600 ccompare,
5601 indx_before_incr, new_phi_tree);
5602 else
5603 new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
5604 cr_index_vector_type,
5605 ccompare,
5606 new_phi_tree, indx_before_incr);
5607 }
5608 gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT);
5609
5610 /* Update the phi with the vec cond. */
5611 induction_index = new_phi_tree;
5612 add_phi_arg (as_a <gphi *> (new_phi), induction_index,
5613 loop_latch_edge (loop), UNKNOWN_LOCATION((location_t) 0));
5614 }
5615
5616 /* 2. Create epilog code.
5617 The reduction epilog code operates across the elements of the vector
5618 of partial results computed by the vectorized loop.
5619 The reduction epilog code consists of:
5620
5621 step 1: compute the scalar result in a vector (v_out2)
5622 step 2: extract the scalar result (s_out3) from the vector (v_out2)
5623 step 3: adjust the scalar result (s_out3) if needed.
5624
5625 Step 1 can be accomplished using one the following three schemes:
5626 (scheme 1) using reduc_fn, if available.
5627 (scheme 2) using whole-vector shifts, if available.
5628 (scheme 3) using a scalar loop. In this case steps 1+2 above are
5629 combined.
5630
5631 The overall epilog code looks like this:
5632
5633 s_out0 = phi <s_loop> # original EXIT_PHI
5634 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
5635 v_out2 = reduce <v_out1> # step 1
5636 s_out3 = extract_field <v_out2, 0> # step 2
5637 s_out4 = adjust_result <s_out3> # step 3
5638
5639 (step 3 is optional, and steps 1 and 2 may be combined).
5640 Lastly, the uses of s_out0 are replaced by s_out4. */
5641
5642
5643 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
5644 v_out1 = phi <VECT_DEF>
5645 Store them in NEW_PHIS. */
5646 if (double_reduc)
5647 loop = outer_loop;
5648 exit_bb = single_exit (loop)->dest;
5649 exit_gsi = gsi_after_labels (exit_bb);
5650 reduc_inputs.create (slp_node ? vec_num : ncopies);
5651 for (unsigned i = 0; i < vec_num; i++)
5652 {
5653 gimple_seq stmts = NULLnullptr;
5654 if (slp_node)
5655 def = vect_get_slp_vect_def (slp_node, i);
5656 else
5657 def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[0]);
5658 for (j = 0; j < ncopies; j++)
5659 {
5660 tree new_def = copy_ssa_name (def);
5661 phi = create_phi_node (new_def, exit_bb);
5662 if (j)
5663 def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[j]);
5664 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def)set_ssa_use_from_ptr (gimple_phi_arg_imm_use_ptr (((phi)), ((
single_exit (loop)->dest_idx))), (def))
;
5665 new_def = gimple_convert (&stmts, vectype, new_def);
5666 reduc_inputs.quick_push (new_def);
5667 }
5668 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5669 }
5670
5671 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
5672 (i.e. when reduc_fn is not available) and in the final adjustment
5673 code (if needed). Also get the original scalar reduction variable as
5674 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
5675 represents a reduction pattern), the tree-code and scalar-def are
5676 taken from the original stmt that the pattern-stmt (STMT) replaces.
5677 Otherwise (it is a regular reduction) - the tree-code and scalar-def
5678 are taken from STMT. */
5679
5680 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
5681 if (orig_stmt_info != stmt_info)
5682 {
5683 /* Reduction pattern */
5684 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info))((void)(!((orig_stmt_info)->in_pattern_p) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5684, __FUNCTION__), 0 : 0))
;
5685 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info)((void)(!((orig_stmt_info)->related_stmt == stmt_info) ? fancy_abort
("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5685, __FUNCTION__), 0 : 0))
;
5686 }
5687
5688 scalar_dest = gimple_get_lhs (orig_stmt_info->stmt);
5689 scalar_type = TREE_TYPE (scalar_dest)((contains_struct_check ((scalar_dest), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5689, __FUNCTION__))->typed.type)
;
5690 scalar_results.truncate (0);
5691 scalar_results.reserve_exact (group_size);
5692 new_scalar_dest = vect_create_destination_var (scalar_dest, NULLnullptr);
5693 bitsize = TYPE_SIZE (scalar_type)((tree_class_check ((scalar_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5693, __FUNCTION__))->type_common.size)
;
5694
5695 /* True if we should implement SLP_REDUC using native reduction operations
5696 instead of scalar operations. */
5697 direct_slp_reduc = (reduc_fn != IFN_LAST
5698 && slp_reduc
5699 && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ());
5700
5701 /* In case of reduction chain, e.g.,
5702 # a1 = phi <a3, a0>
5703 a2 = operation (a1)
5704 a3 = operation (a2),
5705
5706 we may end up with more than one vector result. Here we reduce them
5707 to one vector.
5708
5709 The same is true for a SLP reduction, e.g.,
5710 # a1 = phi <a2, a0>
5711 # b1 = phi <b2, b0>
5712 a2 = operation (a1)
5713 b2 = operation (a2),
5714
5715 where we can end up with more than one vector as well. We can
5716 easily accumulate vectors when the number of vector elements is
5717 a multiple of the SLP group size.
5718
5719 The same is true if we couldn't use a single defuse cycle. */
5720 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.cc"
, 5720, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
5721 || direct_slp_reduc
5722 || (slp_reduc
5723 && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size))
5724 || ncopies > 1)
5725 {
5726 gimple_seq stmts = NULLnullptr;
5727 tree single_input = reduc_inputs[0];
5728 for (k = 1; k < reduc_inputs.length (); k++)
5729 single_input = gimple_build (&stmts, code, vectype,
5730 single_input, reduc_inputs[k]);
5731 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5732
5733 reduc_inputs.truncate (0);
5734 reduc_inputs.safe_push (single_input);
5735 }
5736
5737 tree orig_reduc_input = reduc_inputs[0];
5738
5739 /* If this loop is an epilogue loop that can be skipped after the
5740