Bug Summary

File:build/gcc/tree-vect-loop.c
Warning:line 6475, column 5
Value stored to 'stmt_info' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name tree-vect-loop.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model static -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/objdir/gcc -resource-dir /usr/lib64/clang/13.0.0 -D IN_GCC -D HAVE_CONFIG_H -I . -I . -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/. -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../include -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libcpp/include -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libcody -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libdecnumber -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libdecnumber/bid -I ../libdecnumber -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libbacktrace -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/11/../../../../include/c++/11 -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/11/../../../../include/c++/11/x86_64-suse-linux -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/11/../../../../include/c++/11/backward -internal-isystem /usr/lib64/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/11/../../../../x86_64-suse-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-narrowing -Wwrite-strings -Wno-error=format-diag -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fdeprecated-macro -fdebug-compilation-dir=/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/objdir/gcc -ferror-limit 19 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=plist-html -analyzer-config silence-checkers=core.NullDereference -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/objdir/clang-static-analyzer/2021-11-20-133755-20252-1/report-kGBiIJ.plist -x c++ /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c
1/* Loop Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4 Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#define INCLUDE_ALGORITHM
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "target.h"
28#include "rtl.h"
29#include "tree.h"
30#include "gimple.h"
31#include "cfghooks.h"
32#include "tree-pass.h"
33#include "ssa.h"
34#include "optabs-tree.h"
35#include "diagnostic-core.h"
36#include "fold-const.h"
37#include "stor-layout.h"
38#include "cfganal.h"
39#include "gimplify.h"
40#include "gimple-iterator.h"
41#include "gimplify-me.h"
42#include "tree-ssa-loop-ivopts.h"
43#include "tree-ssa-loop-manip.h"
44#include "tree-ssa-loop-niter.h"
45#include "tree-ssa-loop.h"
46#include "cfgloop.h"
47#include "tree-scalar-evolution.h"
48#include "tree-vectorizer.h"
49#include "gimple-fold.h"
50#include "cgraph.h"
51#include "tree-cfg.h"
52#include "tree-if-conv.h"
53#include "internal-fn.h"
54#include "tree-vector-builder.h"
55#include "vec-perm-indices.h"
56#include "tree-eh.h"
57
58/* Loop Vectorization Pass.
59
60 This pass tries to vectorize loops.
61
62 For example, the vectorizer transforms the following simple loop:
63
64 short a[N]; short b[N]; short c[N]; int i;
65
66 for (i=0; i<N; i++){
67 a[i] = b[i] + c[i];
68 }
69
70 as if it was manually vectorized by rewriting the source code into:
71
72 typedef int __attribute__((mode(V8HI))) v8hi;
73 short a[N]; short b[N]; short c[N]; int i;
74 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
75 v8hi va, vb, vc;
76
77 for (i=0; i<N/8; i++){
78 vb = pb[i];
79 vc = pc[i];
80 va = vb + vc;
81 pa[i] = va;
82 }
83
84 The main entry to this pass is vectorize_loops(), in which
85 the vectorizer applies a set of analyses on a given set of loops,
86 followed by the actual vectorization transformation for the loops that
87 had successfully passed the analysis phase.
88 Throughout this pass we make a distinction between two types of
89 data: scalars (which are represented by SSA_NAMES), and memory references
90 ("data-refs"). These two types of data require different handling both
91 during analysis and transformation. The types of data-refs that the
92 vectorizer currently supports are ARRAY_REFS which base is an array DECL
93 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
94 accesses are required to have a simple (consecutive) access pattern.
95
96 Analysis phase:
97 ===============
98 The driver for the analysis phase is vect_analyze_loop().
99 It applies a set of analyses, some of which rely on the scalar evolution
100 analyzer (scev) developed by Sebastian Pop.
101
102 During the analysis phase the vectorizer records some information
103 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
104 loop, as well as general information about the loop as a whole, which is
105 recorded in a "loop_vec_info" struct attached to each loop.
106
107 Transformation phase:
108 =====================
109 The loop transformation phase scans all the stmts in the loop, and
110 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
111 the loop that needs to be vectorized. It inserts the vector code sequence
112 just before the scalar stmt S, and records a pointer to the vector code
113 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
114 attached to S). This pointer will be used for the vectorization of following
115 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
116 otherwise, we rely on dead code elimination for removing it.
117
118 For example, say stmt S1 was vectorized into stmt VS1:
119
120 VS1: vb = px[i];
121 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
122 S2: a = b;
123
124 To vectorize stmt S2, the vectorizer first finds the stmt that defines
125 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
126 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
127 resulting sequence would be:
128
129 VS1: vb = px[i];
130 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
131 VS2: va = vb;
132 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
133
134 Operands that are not SSA_NAMEs, are data-refs that appear in
135 load/store operations (like 'x[i]' in S1), and are handled differently.
136
137 Target modeling:
138 =================
139 Currently the only target specific information that is used is the
140 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
141 Targets that can support different sizes of vectors, for now will need
142 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
143 flexibility will be added in the future.
144
145 Since we only vectorize operations which vector form can be
146 expressed using existing tree codes, to verify that an operation is
147 supported, the vectorizer checks the relevant optab at the relevant
148 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
149 the value found is CODE_FOR_nothing, then there's no target support, and
150 we can't vectorize the stmt.
151
152 For additional information on this project see:
153 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
154*/
155
156static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
157static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
158 bool *, bool *);
159
160/* Subroutine of vect_determine_vf_for_stmt that handles only one
161 statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
162 may already be set for general statements (not just data refs). */
163
164static opt_result
165vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
166 bool vectype_maybe_set_p,
167 poly_uint64 *vf)
168{
169 gimple *stmt = stmt_info->stmt;
170
171 if ((!STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
172 && !STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live)
173 || gimple_clobber_p (stmt))
174 {
175 if (dump_enabled_p ())
176 dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
177 return opt_result::success ();
178 }
179
180 tree stmt_vectype, nunits_vectype;
181 opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info,
182 &stmt_vectype,
183 &nunits_vectype);
184 if (!res)
185 return res;
186
187 if (stmt_vectype)
188 {
189 if (STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype)
190 /* The only case when a vectype had been already set is for stmts
191 that contain a data ref, or for "pattern-stmts" (stmts generated
192 by the vectorizer to represent/replace a certain idiom). */
193 gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p
) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 195, __FUNCTION__), 0 : 0))
194 || vectype_maybe_set_p)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p
) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 195, __FUNCTION__), 0 : 0))
195 && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p
) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 195, __FUNCTION__), 0 : 0))
;
196 else
197 STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = stmt_vectype;
198 }
199
200 if (nunits_vectype)
201 vect_update_max_nunits (vf, nunits_vectype);
202
203 return opt_result::success ();
204}
205
206/* Subroutine of vect_determine_vectorization_factor. Set the vector
207 types of STMT_INFO and all attached pattern statements and update
208 the vectorization factor VF accordingly. Return true on success
209 or false if something prevented vectorization. */
210
211static opt_result
212vect_determine_vf_for_stmt (vec_info *vinfo,
213 stmt_vec_info stmt_info, poly_uint64 *vf)
214{
215 if (dump_enabled_p ())
216 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
217 stmt_info->stmt);
218 opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf);
219 if (!res)
220 return res;
221
222 if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p
223 && STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt)
224 {
225 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq;
226 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
227
228 /* If a pattern statement has def stmts, analyze them too. */
229 for (gimple_stmt_iterator si = gsi_start (pattern_def_seq)gsi_start_1 (&(pattern_def_seq));
230 !gsi_end_p (si); gsi_next (&si))
231 {
232 stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
233 if (dump_enabled_p ())
234 dump_printf_loc (MSG_NOTE, vect_location,
235 "==> examining pattern def stmt: %G",
236 def_stmt_info->stmt);
237 res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf);
238 if (!res)
239 return res;
240 }
241
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "==> examining pattern statement: %G",
245 stmt_info->stmt);
246 res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf);
247 if (!res)
248 return res;
249 }
250
251 return opt_result::success ();
252}
253
254/* Function vect_determine_vectorization_factor
255
256 Determine the vectorization factor (VF). VF is the number of data elements
257 that are operated upon in parallel in a single iteration of the vectorized
258 loop. For example, when vectorizing a loop that operates on 4byte elements,
259 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
260 elements can fit in a single vector register.
261
262 We currently support vectorization of loops in which all types operated upon
263 are of the same size. Therefore this function currently sets VF according to
264 the size of the types operated upon, and fails if there are multiple sizes
265 in the loop.
266
267 VF is also the factor by which the loop iterations are strip-mined, e.g.:
268 original loop:
269 for (i=0; i<N; i++){
270 a[i] = b[i] + c[i];
271 }
272
273 vectorized loop:
274 for (i=0; i<N; i+=VF){
275 a[i:VF] = b[i:VF] + c[i:VF];
276 }
277*/
278
279static opt_result
280vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
281{
282 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
283 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
284 unsigned nbbs = loop->num_nodes;
285 poly_uint64 vectorization_factor = 1;
286 tree scalar_type = NULL_TREE(tree) nullptr;
287 gphi *phi;
288 tree vectype;
289 stmt_vec_info stmt_info;
290 unsigned i;
291
292 DUMP_VECT_SCOPE ("vect_determine_vectorization_factor")auto_dump_scope scope ("vect_determine_vectorization_factor",
vect_location)
;
293
294 for (i = 0; i < nbbs; i++)
295 {
296 basic_block bb = bbs[i];
297
298 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
299 gsi_next (&si))
300 {
301 phi = si.phi ();
302 stmt_info = loop_vinfo->lookup_stmt (phi);
303 if (dump_enabled_p ())
304 dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G",
305 phi);
306
307 gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 307, __FUNCTION__), 0 : 0))
;
308
309 if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
310 || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live)
311 {
312 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info))((void)(!(!(stmt_info)->vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 312, __FUNCTION__), 0 : 0))
;
313 scalar_type = TREE_TYPE (PHI_RESULT (phi))((contains_struct_check ((get_def_from_ptr (gimple_phi_result_ptr
(phi))), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 313, __FUNCTION__))->typed.type)
;
314
315 if (dump_enabled_p ())
316 dump_printf_loc (MSG_NOTE, vect_location,
317 "get vectype for scalar type: %T\n",
318 scalar_type);
319
320 vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
321 if (!vectype)
322 return opt_result::failure_at (phi,
323 "not vectorized: unsupported "
324 "data-type %T\n",
325 scalar_type);
326 STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = vectype;
327
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
330 vectype);
331
332 if (dump_enabled_p ())
333 {
334 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
335 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
336 dump_printf (MSG_NOTE, "\n");
337 }
338
339 vect_update_max_nunits (&vectorization_factor, vectype);
340 }
341 }
342
343 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
344 gsi_next (&si))
345 {
346 if (is_gimple_debug (gsi_stmt (si)))
347 continue;
348 stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
349 opt_result res
350 = vect_determine_vf_for_stmt (loop_vinfo,
351 stmt_info, &vectorization_factor);
352 if (!res)
353 return res;
354 }
355 }
356
357 /* TODO: Analyze cost. Decide if worth while to vectorize. */
358 if (dump_enabled_p ())
359 {
360 dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
361 dump_dec (MSG_NOTE, vectorization_factor);
362 dump_printf (MSG_NOTE, "\n");
363 }
364
365 if (known_le (vectorization_factor, 1U)(!maybe_lt (1U, vectorization_factor)))
366 return opt_result::failure_at (vect_location,
367 "not vectorized: unsupported data-type\n");
368 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor;
369 return opt_result::success ();
370}
371
372
373/* Function vect_is_simple_iv_evolution.
374
375 FORNOW: A simple evolution of an induction variables in the loop is
376 considered a polynomial evolution. */
377
378static bool
379vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
380 tree * step)
381{
382 tree init_expr;
383 tree step_expr;
384 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
385 basic_block bb;
386
387 /* When there is no evolution in this loop, the evolution function
388 is not "simple". */
389 if (evolution_part == NULL_TREE(tree) nullptr)
390 return false;
391
392 /* When the evolution is a polynomial of degree >= 2
393 the evolution function is not "simple". */
394 if (tree_is_chrec (evolution_part))
395 return false;
396
397 step_expr = evolution_part;
398 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
399
400 if (dump_enabled_p ())
401 dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n",
402 step_expr, init_expr);
403
404 *init = init_expr;
405 *step = step_expr;
406
407 if (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != INTEGER_CST
408 && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != SSA_NAME
409 || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)(tree_check ((step_expr), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 409, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
))
410 && flow_bb_inside_loop_p (get_loop (cfun(cfun + 0), loop_nb), bb))
411 || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 411, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (((contains_struct_check ((step_expr), (
TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 411, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (((contains_struct_check ((step_expr), (
TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 411, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE
)
412 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 412, __FUNCTION__))->typed.type))->base.code) == REAL_TYPE
)
413 || !flag_associative_mathglobal_options.x_flag_associative_math)))
414 && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != REAL_CST
415 || !flag_associative_mathglobal_options.x_flag_associative_math))
416 {
417 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "step unknown.\n");
420 return false;
421 }
422
423 return true;
424}
425
426/* Return true if PHI, described by STMT_INFO, is the inner PHI in
427 what we are assuming is a double reduction. For example, given
428 a structure like this:
429
430 outer1:
431 x_1 = PHI <x_4(outer2), ...>;
432 ...
433
434 inner:
435 x_2 = PHI <x_1(outer1), ...>;
436 ...
437 x_3 = ...;
438 ...
439
440 outer2:
441 x_4 = PHI <x_3(inner)>;
442 ...
443
444 outer loop analysis would treat x_1 as a double reduction phi and
445 this function would then return true for x_2. */
446
447static bool
448vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi)
449{
450 use_operand_p use_p;
451 ssa_op_iter op_iter;
452 FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)for ((use_p) = op_iter_init_phiuse (&(op_iter), phi, 0x01
); !op_iter_done (&(op_iter)); (use_p) = op_iter_next_use
(&(op_iter)))
453 if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)get_use_from_ptr (use_p)))
454 if (STMT_VINFO_DEF_TYPE (def_info)(def_info)->def_type == vect_double_reduction_def)
455 return true;
456 return false;
457}
458
459/* Function vect_analyze_scalar_cycles_1.
460
461 Examine the cross iteration def-use cycles of scalar variables
462 in LOOP. LOOP_VINFO represents the loop that is now being
463 considered for vectorization (can be LOOP, or an outer-loop
464 enclosing LOOP). */
465
466static void
467vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop)
468{
469 basic_block bb = loop->header;
470 tree init, step;
471 auto_vec<stmt_vec_info, 64> worklist;
472 gphi_iterator gsi;
473 bool double_reduc, reduc_chain;
474
475 DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles")auto_dump_scope scope ("vect_analyze_scalar_cycles", vect_location
)
;
476
477 /* First - identify all inductions. Reduction detection assumes that all the
478 inductions have been identified, therefore, this order must not be
479 changed. */
480 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
481 {
482 gphi *phi = gsi.phi ();
483 tree access_fn = NULLnullptr;
484 tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
485 stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi);
486
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi);
489
490 /* Skip virtual phi's. The data dependences that are associated with
491 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
492 if (virtual_operand_p (def))
493 continue;
494
495 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_unknown_def_type;
496
497 /* Analyze the evolution function. */
498 access_fn = analyze_scalar_evolution (loop, def);
499 if (access_fn)
500 {
501 STRIP_NOPS (access_fn)(access_fn) = tree_strip_nop_conversions ((const_cast<union
tree_node *> (((access_fn)))))
;
502 if (dump_enabled_p ())
503 dump_printf_loc (MSG_NOTE, vect_location,
504 "Access function of PHI: %T\n", access_fn);
505 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_base_unchanged
506 = initial_condition_in_loop_num (access_fn, loop->num);
507 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_part
508 = evolution_part_in_loop_num (access_fn, loop->num);
509 }
510
511 if (!access_fn
512 || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi)
513 || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
514 || (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop
515 && TREE_CODE (step)((enum tree_code) (step)->base.code) != INTEGER_CST))
516 {
517 worklist.safe_push (stmt_vinfo);
518 continue;
519 }
520
521 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged !=
(tree) nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 522, __FUNCTION__), 0 : 0))
522 != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged !=
(tree) nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 522, __FUNCTION__), 0 : 0))
;
523 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_part != (tree) nullptr
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 523, __FUNCTION__), 0 : 0))
;
524
525 if (dump_enabled_p ())
526 dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
527 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_induction_def;
528 }
529
530
531 /* Second - identify all reductions and nested cycles. */
532 while (worklist.length () > 0)
533 {
534 stmt_vec_info stmt_vinfo = worklist.pop ();
535 gphi *phi = as_a <gphi *> (stmt_vinfo->stmt);
536 tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
537
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi);
540
541 gcc_assert (!virtual_operand_p (def)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)->
def_type == vect_unknown_def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 542, __FUNCTION__), 0 : 0))
542 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)->
def_type == vect_unknown_def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 542, __FUNCTION__), 0 : 0))
;
543
544 stmt_vec_info reduc_stmt_info
545 = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc,
546 &reduc_chain);
547 if (reduc_stmt_info)
548 {
549 STMT_VINFO_REDUC_DEF (stmt_vinfo)(stmt_vinfo)->reduc_def = reduc_stmt_info;
550 STMT_VINFO_REDUC_DEF (reduc_stmt_info)(reduc_stmt_info)->reduc_def = stmt_vinfo;
551 if (double_reduc)
552 {
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_NOTE, vect_location,
555 "Detected double reduction.\n");
556
557 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_double_reduction_def;
558 STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_double_reduction_def;
559 }
560 else
561 {
562 if (loop != LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)
563 {
564 if (dump_enabled_p ())
565 dump_printf_loc (MSG_NOTE, vect_location,
566 "Detected vectorizable nested cycle.\n");
567
568 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_nested_cycle;
569 }
570 else
571 {
572 if (dump_enabled_p ())
573 dump_printf_loc (MSG_NOTE, vect_location,
574 "Detected reduction.\n");
575
576 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_reduction_def;
577 STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_reduction_def;
578 /* Store the reduction cycles for possible vectorization in
579 loop-aware SLP if it was not detected as reduction
580 chain. */
581 if (! reduc_chain)
582 LOOP_VINFO_REDUCTIONS (loop_vinfo)(loop_vinfo)->reductions.safe_push
583 (reduc_stmt_info);
584 }
585 }
586 }
587 else
588 if (dump_enabled_p ())
589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
590 "Unknown def-use cycle pattern.\n");
591 }
592}
593
594
595/* Function vect_analyze_scalar_cycles.
596
597 Examine the cross iteration def-use cycles of scalar variables, by
598 analyzing the loop-header PHIs of scalar variables. Classify each
599 cycle as one of the following: invariant, induction, reduction, unknown.
600 We do that for the loop represented by LOOP_VINFO, and also to its
601 inner-loop, if exists.
602 Examples for scalar cycles:
603
604 Example1: reduction:
605
606 loop1:
607 for (i=0; i<N; i++)
608 sum += a[i];
609
610 Example2: induction:
611
612 loop2:
613 for (i=0; i<N; i++)
614 a[i] = i; */
615
616static void
617vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
618{
619 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
620
621 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
622
623 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
624 Reductions in such inner-loop therefore have different properties than
625 the reductions in the nest that gets vectorized:
626 1. When vectorized, they are executed in the same order as in the original
627 scalar loop, so we can't change the order of computation when
628 vectorizing them.
629 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
630 current checks are too strict. */
631
632 if (loop->inner)
633 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
634}
635
636/* Transfer group and reduction information from STMT_INFO to its
637 pattern stmt. */
638
639static void
640vect_fixup_reduc_chain (stmt_vec_info stmt_info)
641{
642 stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
643 stmt_vec_info stmtp;
644 gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort (
"/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 644, __FUNCTION__), 0 : 0)), (firstp)->first_element) &&
(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 645, __FUNCTION__), 0 : 0)), (stmt_info)->first_element)
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 645, __FUNCTION__), 0 : 0))
645 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort (
"/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 644, __FUNCTION__), 0 : 0)), (firstp)->first_element) &&
(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 645, __FUNCTION__), 0 : 0)), (stmt_info)->first_element)
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 645, __FUNCTION__), 0 : 0))
;
646 REDUC_GROUP_SIZE (firstp)(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 646, __FUNCTION__), 0 : 0)), (firstp)->size)
= REDUC_GROUP_SIZE (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 646, __FUNCTION__), 0 : 0)), (stmt_info)->size)
;
647 do
648 {
649 stmtp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
650 gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)((void)(!((stmtp)->def_type == (stmt_info)->def_type) ?
fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 651, __FUNCTION__), 0 : 0))
651 == STMT_VINFO_DEF_TYPE (stmt_info))((void)(!((stmtp)->def_type == (stmt_info)->def_type) ?
fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 651, __FUNCTION__), 0 : 0))
;
652 REDUC_GROUP_FIRST_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 652, __FUNCTION__), 0 : 0)), (stmtp)->first_element)
= firstp;
653 stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 653, __FUNCTION__), 0 : 0)), (stmt_info)->next_element)
;
654 if (stmt_info)
655 REDUC_GROUP_NEXT_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 655, __FUNCTION__), 0 : 0)), (stmtp)->next_element)
656 = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
657 }
658 while (stmt_info);
659}
660
661/* Fixup scalar cycles that now have their stmts detected as patterns. */
662
663static void
664vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
665{
666 stmt_vec_info first;
667 unsigned i;
668
669 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)for (i = 0; ((loop_vinfo)->reduction_chains).iterate ((i),
&(first)); ++(i))
670 {
671 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first)(((void)(!(!(first)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 671, __FUNCTION__), 0 : 0)), (first)->next_element)
;
672 while (next)
673 {
674 if ((STMT_VINFO_IN_PATTERN_P (next)(next)->in_pattern_p
675 != STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p)
676 || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next))(vect_stmt_to_vectorize (next))->reduc_idx == -1)
677 break;
678 next = REDUC_GROUP_NEXT_ELEMENT (next)(((void)(!(!(next)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 678, __FUNCTION__), 0 : 0)), (next)->next_element)
;
679 }
680 /* If all reduction chain members are well-formed patterns adjust
681 the group to group the pattern stmts instead. */
682 if (! next
683 && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->reduc_idx != -1)
684 {
685 if (STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p)
686 {
687 vect_fixup_reduc_chain (first);
688 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains[i]
689 = STMT_VINFO_RELATED_STMT (first)(first)->related_stmt;
690 }
691 }
692 /* If not all stmt in the chain are patterns or if we failed
693 to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
694 it as regular reduction instead. */
695 else
696 {
697 stmt_vec_info vinfo = first;
698 stmt_vec_info last = NULLnullptr;
699 while (vinfo)
700 {
701 next = REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 701, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
;
702 REDUC_GROUP_FIRST_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 702, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
= NULLnullptr;
703 REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 703, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
= NULLnullptr;
704 last = vinfo;
705 vinfo = next;
706 }
707 STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->def_type
708 = vect_internal_def;
709 loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last));
710 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.unordered_remove (i);
711 --i;
712 }
713 }
714}
715
716/* Function vect_get_loop_niters.
717
718 Determine how many iterations the loop is executed and place it
719 in NUMBER_OF_ITERATIONS. Place the number of latch iterations
720 in NUMBER_OF_ITERATIONSM1. Place the condition under which the
721 niter information holds in ASSUMPTIONS.
722
723 Return the loop exit condition. */
724
725
726static gcond *
727vect_get_loop_niters (class loop *loop, tree *assumptions,
728 tree *number_of_iterations, tree *number_of_iterationsm1)
729{
730 edge exit = single_exit (loop);
731 class tree_niter_desc niter_desc;
732 tree niter_assumptions, niter, may_be_zero;
733 gcond *cond = get_loop_exit_condition (loop);
734
735 *assumptions = boolean_true_nodeglobal_trees[TI_BOOLEAN_TRUE];
736 *number_of_iterationsm1 = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW];
737 *number_of_iterations = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW];
738 DUMP_VECT_SCOPE ("get_loop_niters")auto_dump_scope scope ("get_loop_niters", vect_location);
739
740 if (!exit)
741 return cond;
742
743 may_be_zero = NULL_TREE(tree) nullptr;
744 if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULLnullptr)
745 || chrec_contains_undetermined (niter_desc.niter))
746 return cond;
747
748 niter_assumptions = niter_desc.assumptions;
749 may_be_zero = niter_desc.may_be_zero;
750 niter = niter_desc.niter;
751
752 if (may_be_zero && integer_zerop (may_be_zero))
753 may_be_zero = NULL_TREE(tree) nullptr;
754
755 if (may_be_zero)
756 {
757 if (COMPARISON_CLASS_P (may_be_zero)(tree_code_type[(int) (((enum tree_code) (may_be_zero)->base
.code))] == tcc_comparison)
)
758 {
759 /* Try to combine may_be_zero with assumptions, this can simplify
760 computation of niter expression. */
761 if (niter_assumptions && !integer_nonzerop (niter_assumptions))
762 niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
763 niter_assumptions,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
764 fold_build1 (TRUTH_NOT_EXPR,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
765 boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
766 may_be_zero))fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
;
767 else
768 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 768, __FUNCTION__))->typed.type), may_be_zero, build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 769, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow
(niter) )
769 build_int_cst (TREE_TYPE (niter), 0),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 768, __FUNCTION__))->typed.type), may_be_zero, build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 769, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow
(niter) )
770 rewrite_to_non_trapping_overflow (niter))fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 768, __FUNCTION__))->typed.type), may_be_zero, build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 769, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow
(niter) )
;
771
772 may_be_zero = NULL_TREE(tree) nullptr;
773 }
774 else if (integer_nonzerop (may_be_zero))
775 {
776 *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 776, __FUNCTION__))->typed.type)
, 0);
777 *number_of_iterations = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 777, __FUNCTION__))->typed.type)
, 1);
778 return cond;
779 }
780 else
781 return cond;
782 }
783
784 *assumptions = niter_assumptions;
785 *number_of_iterationsm1 = niter;
786
787 /* We want the number of loop header executions which is the number
788 of latch executions plus one.
789 ??? For UINT_MAX latch executions this number overflows to zero
790 for loops like do { n++; } while (n != 0); */
791 if (niter && !chrec_contains_undetermined (niter))
792 niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 792, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 793, __FUNCTION__))->typed.type), 1) )
793 build_int_cst (TREE_TYPE (niter), 1))fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 792, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 793, __FUNCTION__))->typed.type), 1) )
;
794 *number_of_iterations = niter;
795
796 return cond;
797}
798
799/* Function bb_in_loop_p
800
801 Used as predicate for dfs order traversal of the loop bbs. */
802
803static bool
804bb_in_loop_p (const_basic_block bb, const void *data)
805{
806 const class loop *const loop = (const class loop *)data;
807 if (flow_bb_inside_loop_p (loop, bb))
808 return true;
809 return false;
810}
811
812
813/* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as
814 stmt_vec_info structs for all the stmts in LOOP_IN. */
815
816_loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
817 : vec_info (vec_info::loop, shared),
818 loop (loop_in),
819 bbs (XCNEWVEC (basic_block, loop->num_nodes)((basic_block *) xcalloc ((loop->num_nodes), sizeof (basic_block
)))
),
820 num_itersm1 (NULL_TREE(tree) nullptr),
821 num_iters (NULL_TREE(tree) nullptr),
822 num_iters_unchanged (NULL_TREE(tree) nullptr),
823 num_iters_assumptions (NULL_TREE(tree) nullptr),
824 vector_costs (nullptr),
825 scalar_costs (nullptr),
826 th (0),
827 versioning_threshold (0),
828 vectorization_factor (0),
829 main_loop_edge (nullptr),
830 skip_main_loop_edge (nullptr),
831 skip_this_loop_edge (nullptr),
832 reusable_accumulators (),
833 max_vectorization_factor (0),
834 mask_skip_niters (NULL_TREE(tree) nullptr),
835 rgroup_compare_type (NULL_TREE(tree) nullptr),
836 simd_if_cond (NULL_TREE(tree) nullptr),
837 unaligned_dr (NULLnullptr),
838 peeling_for_alignment (0),
839 ptr_mask (0),
840 ivexpr_map (NULLnullptr),
841 scan_map (NULLnullptr),
842 slp_unrolling_factor (1),
843 inner_loop_cost_factor (param_vect_inner_loop_cost_factorglobal_options.x_param_vect_inner_loop_cost_factor),
844 vectorizable (false),
845 can_use_partial_vectors_p (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0),
846 using_partial_vectors_p (false),
847 epil_using_partial_vectors_p (false),
848 peeling_for_gaps (false),
849 peeling_for_niter (false),
850 no_data_dependencies (false),
851 has_mask_store (false),
852 scalar_loop_scaling (profile_probability::uninitialized ()),
853 scalar_loop (NULLnullptr),
854 orig_loop_info (NULLnullptr)
855{
856 /* CHECKME: We want to visit all BBs before their successors (except for
857 latch blocks, for which this assertion wouldn't hold). In the simple
858 case of the loop forms we allow, a dfs order of the BBs would the same
859 as reversed postorder traversal, so we are safe. */
860
861 unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
862 bbs, loop->num_nodes, loop);
863 gcc_assert (nbbs == loop->num_nodes)((void)(!(nbbs == loop->num_nodes) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 863, __FUNCTION__), 0 : 0))
;
864
865 for (unsigned int i = 0; i < nbbs; i++)
866 {
867 basic_block bb = bbs[i];
868 gimple_stmt_iterator si;
869
870 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
871 {
872 gimple *phi = gsi_stmt (si);
873 gimple_set_uid (phi, 0);
874 add_stmt (phi);
875 }
876
877 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
878 {
879 gimple *stmt = gsi_stmt (si);
880 gimple_set_uid (stmt, 0);
881 if (is_gimple_debug (stmt))
882 continue;
883 add_stmt (stmt);
884 /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the
885 third argument is the #pragma omp simd if (x) condition, when 0,
886 loop shouldn't be vectorized, when non-zero constant, it should
887 be vectorized normally, otherwise versioned with vectorized loop
888 done if the condition is non-zero at runtime. */
889 if (loop_in->simduid
890 && is_gimple_call (stmt)
891 && gimple_call_internal_p (stmt)
892 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
893 && gimple_call_num_args (stmt) >= 3
894 && TREE_CODE (gimple_call_arg (stmt, 0))((enum tree_code) (gimple_call_arg (stmt, 0))->base.code) == SSA_NAME
895 && (loop_in->simduid
896 == SSA_NAME_VAR (gimple_call_arg (stmt, 0))((tree_check ((gimple_call_arg (stmt, 0)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 896, __FUNCTION__, (SSA_NAME)))->ssa_name.var == (tree) nullptr
|| ((enum tree_code) ((gimple_call_arg (stmt, 0))->ssa_name
.var)->base.code) == IDENTIFIER_NODE ? (tree) nullptr : (gimple_call_arg
(stmt, 0))->ssa_name.var)
))
897 {
898 tree arg = gimple_call_arg (stmt, 2);
899 if (integer_zerop (arg) || TREE_CODE (arg)((enum tree_code) (arg)->base.code) == SSA_NAME)
900 simd_if_cond = arg;
901 else
902 gcc_assert (integer_nonzerop (arg))((void)(!(integer_nonzerop (arg)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 902, __FUNCTION__), 0 : 0))
;
903 }
904 }
905 }
906
907 epilogue_vinfos.create (6);
908}
909
910/* Free all levels of rgroup CONTROLS. */
911
912void
913release_vec_loop_controls (vec<rgroup_controls> *controls)
914{
915 rgroup_controls *rgc;
916 unsigned int i;
917 FOR_EACH_VEC_ELT (*controls, i, rgc)for (i = 0; (*controls).iterate ((i), &(rgc)); ++(i))
918 rgc->controls.release ();
919 controls->release ();
920}
921
922/* Free all memory used by the _loop_vec_info, as well as all the
923 stmt_vec_info structs of all the stmts in the loop. */
924
925_loop_vec_info::~_loop_vec_info ()
926{
927 free (bbs);
928
929 release_vec_loop_controls (&masks);
930 release_vec_loop_controls (&lens);
931 delete ivexpr_map;
932 delete scan_map;
933 epilogue_vinfos.release ();
934 delete scalar_costs;
935 delete vector_costs;
936
937 /* When we release an epiloge vinfo that we do not intend to use
938 avoid clearing AUX of the main loop which should continue to
939 point to the main loop vinfo since otherwise we'll leak that. */
940 if (loop->aux == this)
941 loop->aux = NULLnullptr;
942}
943
944/* Return an invariant or register for EXPR and emit necessary
945 computations in the LOOP_VINFO loop preheader. */
946
947tree
948cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr)
949{
950 if (is_gimple_reg (expr)
951 || is_gimple_min_invariant (expr))
952 return expr;
953
954 if (! loop_vinfo->ivexpr_map)
955 loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>;
956 tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr);
957 if (! cached)
958 {
959 gimple_seq stmts = NULLnullptr;
960 cached = force_gimple_operand (unshare_expr (expr),
961 &stmts, true, NULL_TREE(tree) nullptr);
962 if (stmts)
963 {
964 edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
965 gsi_insert_seq_on_edge_immediate (e, stmts);
966 }
967 }
968 return cached;
969}
970
971/* Return true if we can use CMP_TYPE as the comparison type to produce
972 all masks required to mask LOOP_VINFO. */
973
974static bool
975can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type)
976{
977 rgroup_controls *rgm;
978 unsigned int i;
979 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm)
); ++(i))
980 if (rgm->type != NULL_TREE(tree) nullptr
981 && !direct_internal_fn_supported_p (IFN_WHILE_ULT,
982 cmp_type, rgm->type,
983 OPTIMIZE_FOR_SPEED))
984 return false;
985 return true;
986}
987
988/* Calculate the maximum number of scalars per iteration for every
989 rgroup in LOOP_VINFO. */
990
991static unsigned int
992vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
993{
994 unsigned int res = 1;
995 unsigned int i;
996 rgroup_controls *rgm;
997 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm)
); ++(i))
998 res = MAX (res, rgm->max_nscalars_per_iter)((res) > (rgm->max_nscalars_per_iter) ? (res) : (rgm->
max_nscalars_per_iter))
;
999 return res;
1000}
1001
1002/* Calculate the minimum precision necessary to represent:
1003
1004 MAX_NITERS * FACTOR
1005
1006 as an unsigned integer, where MAX_NITERS is the maximum number of
1007 loop header iterations for the original scalar form of LOOP_VINFO. */
1008
1009static unsigned
1010vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
1011{
1012 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1013
1014 /* Get the maximum number of iterations that is representable
1015 in the counter type. */
1016 tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo))((contains_struct_check (((loop_vinfo)->num_itersm1), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1016, __FUNCTION__))->typed.type)
;
1017 widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)((tree_check5 ((ni_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1017, __FUNCTION__, (INTEGER_TYPE), (ENUMERAL_TYPE), (BOOLEAN_TYPE
), (REAL_TYPE), (FIXED_POINT_TYPE)))->type_non_common.maxval
)
) + 1;
1018
1019 /* Get a more refined estimate for the number of iterations. */
1020 widest_int max_back_edges;
1021 if (max_loop_iterations (loop, &max_back_edges))
1022 max_ni = wi::smin (max_ni, max_back_edges + 1);
1023
1024 /* Work out how many bits we need to represent the limit. */
1025 return wi::min_precision (max_ni * factor, UNSIGNED);
1026}
1027
1028/* True if the loop needs peeling or partial vectors when vectorized. */
1029
1030static bool
1031vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo)
1032{
1033 unsigned HOST_WIDE_INTlong const_vf;
1034 HOST_WIDE_INTlong max_niter
1035 = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
1036
1037 unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th;
1038 if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info)
1039 th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO((loop_vinfo)->orig_loop_info)->th
1040 (loop_vinfo))((loop_vinfo)->orig_loop_info)->th;
1041
1042 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
1043 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment >= 0)
1044 {
1045 /* Work out the (constant) number of iterations that need to be
1046 peeled for reasons other than niters. */
1047 unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment;
1048 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps)
1049 peel_niter += 1;
1050 if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1050, __FUNCTION__))))
- peel_niter,
1051 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor))
1052 return true;
1053 }
1054 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment
1055 /* ??? When peeling for gaps but not alignment, we could
1056 try to check whether the (variable) niters is known to be
1057 VF * N + 1. That's something of a niche case though. */
1058 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
1059 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor.is_constant (&const_vf)
1060 || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters)
1061 < (unsigned) exact_log2 (const_vf))
1062 /* In case of versioning, check if the maximum number of
1063 iterations is greater than th. If they are identical,
1064 the epilogue is unnecessary. */
1065 && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
1066 || ((unsigned HOST_WIDE_INTlong) max_niter
1067 > (th / const_vf) * const_vf))))
1068 return true;
1069
1070 return false;
1071}
1072
1073/* Each statement in LOOP_VINFO can be masked where necessary. Check
1074 whether we can actually generate the masks required. Return true if so,
1075 storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */
1076
1077static bool
1078vect_verify_full_masking (loop_vec_info loop_vinfo)
1079{
1080 unsigned int min_ni_width;
1081 unsigned int max_nscalars_per_iter
1082 = vect_get_max_nscalars_per_iter (loop_vinfo);
1083
1084 /* Use a normal loop if there are no statements that need masking.
1085 This only happens in rare degenerate cases: it means that the loop
1086 has no loads, no stores, and no live-out values. */
1087 if (LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ())
1088 return false;
1089
1090 /* Work out how many bits we need to represent the limit. */
1091 min_ni_width
1092 = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter);
1093
1094 /* Find a scalar mode for which WHILE_ULT is supported. */
1095 opt_scalar_int_mode cmp_mode_iter;
1096 tree cmp_type = NULL_TREE(tree) nullptr;
1097 tree iv_type = NULL_TREE(tree) nullptr;
1098 widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
1099 unsigned int iv_precision = UINT_MAX(2147483647 *2U +1U);
1100
1101 if (iv_limit != -1)
1102 iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter,
1103 UNSIGNED);
1104
1105 FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)for (mode_iterator::start (&(cmp_mode_iter), MODE_INT); mode_iterator
::iterate_p (&(cmp_mode_iter)); mode_iterator::get_wider (
&(cmp_mode_iter)))
1106 {
1107 unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ());
1108 if (cmp_bits >= min_ni_width
1109 && targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1110 {
1111 tree this_type = build_nonstandard_integer_type (cmp_bits, true);
1112 if (this_type
1113 && can_produce_all_loop_masks_p (loop_vinfo, this_type))
1114 {
1115 /* Although we could stop as soon as we find a valid mode,
1116 there are at least two reasons why that's not always the
1117 best choice:
1118
1119 - An IV that's Pmode or wider is more likely to be reusable
1120 in address calculations than an IV that's narrower than
1121 Pmode.
1122
1123 - Doing the comparison in IV_PRECISION or wider allows
1124 a natural 0-based IV, whereas using a narrower comparison
1125 type requires mitigations against wrap-around.
1126
1127 Conversely, if the IV limit is variable, doing the comparison
1128 in a wider type than the original type can introduce
1129 unnecessary extensions, so picking the widest valid mode
1130 is not always a good choice either.
1131
1132 Here we prefer the first IV type that's Pmode or wider,
1133 and the first comparison type that's IV_PRECISION or wider.
1134 (The comparison type must be no wider than the IV type,
1135 to avoid extensions in the vector loop.)
1136
1137 ??? We might want to try continuing beyond Pmode for ILP32
1138 targets if CMP_BITS < IV_PRECISION. */
1139 iv_type = this_type;
1140 if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type)((tree_class_check ((cmp_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1140, __FUNCTION__))->type_common.precision)
)
1141 cmp_type = this_type;
1142 if (cmp_bits >= GET_MODE_BITSIZE (Pmode(global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode (
(scalar_int_mode::from_int) E_DImode)) : (scalar_int_mode ((scalar_int_mode
::from_int) E_SImode)))
))
1143 break;
1144 }
1145 }
1146 }
1147
1148 if (!cmp_type)
1149 return false;
1150
1151 LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = cmp_type;
1152 LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type;
1153 return true;
1154}
1155
1156/* Check whether we can use vector access with length based on precison
1157 comparison. So far, to keep it simple, we only allow the case that the
1158 precision of the target supported length is larger than the precision
1159 required by loop niters. */
1160
1161static bool
1162vect_verify_loop_lens (loop_vec_info loop_vinfo)
1163{
1164 if (LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ())
1165 return false;
1166
1167 unsigned int max_nitems_per_iter = 1;
1168 unsigned int i;
1169 rgroup_controls *rgl;
1170 /* Find the maximum number of items per iteration for every rgroup. */
1171 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)for (i = 0; ((loop_vinfo)->lens).iterate ((i), &(rgl))
; ++(i))
1172 {
1173 unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor;
1174 max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter)((max_nitems_per_iter) > (nitems_per_iter) ? (max_nitems_per_iter
) : (nitems_per_iter))
;
1175 }
1176
1177 /* Work out how many bits we need to represent the length limit. */
1178 unsigned int min_ni_prec
1179 = vect_min_prec_for_max_niters (loop_vinfo, max_nitems_per_iter);
1180
1181 /* Now use the maximum of below precisions for one suitable IV type:
1182 - the IV's natural precision
1183 - the precision needed to hold: the maximum number of scalar
1184 iterations multiplied by the scale factor (min_ni_prec above)
1185 - the Pmode precision
1186
1187 If min_ni_prec is less than the precision of the current niters,
1188 we perfer to still use the niters type. Prefer to use Pmode and
1189 wider IV to avoid narrow conversions. */
1190
1191 unsigned int ni_prec
1192 = TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)))((tree_class_check ((((contains_struct_check (((loop_vinfo)->
num_iters), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1192, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1192, __FUNCTION__))->type_common.precision)
;
1193 min_ni_prec = MAX (min_ni_prec, ni_prec)((min_ni_prec) > (ni_prec) ? (min_ni_prec) : (ni_prec));
1194 min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode))((min_ni_prec) > (GET_MODE_BITSIZE ((global_options.x_ix86_pmode
== PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode
)) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode))
))) ? (min_ni_prec) : (GET_MODE_BITSIZE ((global_options.x_ix86_pmode
== PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode
)) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode))
))))
;
1195
1196 tree iv_type = NULL_TREE(tree) nullptr;
1197 opt_scalar_int_mode tmode_iter;
1198 FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)for (mode_iterator::start (&(tmode_iter), MODE_INT); mode_iterator
::iterate_p (&(tmode_iter)); mode_iterator::get_wider (&
(tmode_iter)))
1199 {
1200 scalar_mode tmode = tmode_iter.require ();
1201 unsigned int tbits = GET_MODE_BITSIZE (tmode);
1202
1203 /* ??? Do we really want to construct one IV whose precision exceeds
1204 BITS_PER_WORD? */
1205 if (tbits > BITS_PER_WORD((8) * (((global_options.x_ix86_isa_flags & (1UL <<
1)) != 0) ? 8 : 4))
)
1206 break;
1207
1208 /* Find the first available standard integral type. */
1209 if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode))
1210 {
1211 iv_type = build_nonstandard_integer_type (tbits, true);
1212 break;
1213 }
1214 }
1215
1216 if (!iv_type)
1217 {
1218 if (dump_enabled_p ())
1219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1220 "can't vectorize with length-based partial vectors"
1221 " because there is no suitable iv type.\n");
1222 return false;
1223 }
1224
1225 LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = iv_type;
1226 LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type;
1227
1228 return true;
1229}
1230
1231/* Calculate the cost of one scalar iteration of the loop. */
1232static void
1233vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1234{
1235 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1236 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
1237 int nbbs = loop->num_nodes, factor;
1238 int innerloop_iters, i;
1239
1240 DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost")auto_dump_scope scope ("vect_compute_single_scalar_iteration_cost"
, vect_location)
;
1241
1242 /* Gather costs for statements in the scalar loop. */
1243
1244 /* FORNOW. */
1245 innerloop_iters = 1;
1246 if (loop->inner)
1247 innerloop_iters = LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)(loop_vinfo)->inner_loop_cost_factor;
1248
1249 for (i = 0; i < nbbs; i++)
1250 {
1251 gimple_stmt_iterator si;
1252 basic_block bb = bbs[i];
1253
1254 if (bb->loop_father == loop->inner)
1255 factor = innerloop_iters;
1256 else
1257 factor = 1;
1258
1259 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1260 {
1261 gimple *stmt = gsi_stmt (si);
1262 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
1263
1264 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1265 continue;
1266
1267 /* Skip stmts that are not vectorized inside the loop. */
1268 stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info);
1269 if (!STMT_VINFO_RELEVANT_P (vstmt_info)((vstmt_info)->relevant != vect_unused_in_scope)
1270 && (!STMT_VINFO_LIVE_P (vstmt_info)(vstmt_info)->live
1271 || !VECTORIZABLE_CYCLE_DEF((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info
)->def_type) == vect_double_reduction_def) || (((vstmt_info
)->def_type) == vect_nested_cycle))
1272 (STMT_VINFO_DEF_TYPE (vstmt_info))((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info
)->def_type) == vect_double_reduction_def) || (((vstmt_info
)->def_type) == vect_nested_cycle))
))
1273 continue;
1274
1275 vect_cost_for_stmt kind;
1276 if (STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0))
1277 {
1278 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))(((stmt_info)->dr_aux.dr + 0))->is_read)
1279 kind = scalar_load;
1280 else
1281 kind = scalar_store;
1282 }
1283 else if (vect_nop_conversion_p (stmt_info))
1284 continue;
1285 else
1286 kind = scalar_stmt;
1287
1288 /* We are using vect_prologue here to avoid scaling twice
1289 by the inner loop factor. */
1290 record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec,
1291 factor, kind, stmt_info, 0, vect_prologue);
1292 }
1293 }
1294
1295 /* Now accumulate cost. */
1296 loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
1297 stmt_info_for_cost *si;
1298 int j;
1299 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
1300 j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
1301 (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count,
1302 si->kind, si->stmt_info, si->vectype,
1303 si->misalign, si->where);
1304 loop_vinfo->scalar_costs->finish_cost (nullptr);
1305}
1306
1307
1308/* Function vect_analyze_loop_form.
1309
1310 Verify that certain CFG restrictions hold, including:
1311 - the loop has a pre-header
1312 - the loop has a single entry and exit
1313 - the loop exit condition is simple enough
1314 - the number of iterations can be analyzed, i.e, a countable loop. The
1315 niter could be analyzed under some assumptions. */
1316
1317opt_result
1318vect_analyze_loop_form (class loop *loop, vect_loop_form_info *info)
1319{
1320 DUMP_VECT_SCOPE ("vect_analyze_loop_form")auto_dump_scope scope ("vect_analyze_loop_form", vect_location
)
;
1321
1322 /* Different restrictions apply when we are considering an inner-most loop,
1323 vs. an outer (nested) loop.
1324 (FORNOW. May want to relax some of these restrictions in the future). */
1325
1326 info->inner_loop_cond = NULLnullptr;
1327 if (!loop->inner)
1328 {
1329 /* Inner-most loop. We currently require that the number of BBs is
1330 exactly 2 (the header and latch). Vectorizable inner-most loops
1331 look like this:
1332
1333 (pre-header)
1334 |
1335 header <--------+
1336 | | |
1337 | +--> latch --+
1338 |
1339 (exit-bb) */
1340
1341 if (loop->num_nodes != 2)
1342 return opt_result::failure_at (vect_location,
1343 "not vectorized:"
1344 " control flow in loop.\n");
1345
1346 if (empty_block_p (loop->header))
1347 return opt_result::failure_at (vect_location,
1348 "not vectorized: empty loop.\n");
1349 }
1350 else
1351 {
1352 class loop *innerloop = loop->inner;
1353 edge entryedge;
1354
1355 /* Nested loop. We currently require that the loop is doubly-nested,
1356 contains a single inner loop, and the number of BBs is exactly 5.
1357 Vectorizable outer-loops look like this:
1358
1359 (pre-header)
1360 |
1361 header <---+
1362 | |
1363 inner-loop |
1364 | |
1365 tail ------+
1366 |
1367 (exit-bb)
1368
1369 The inner-loop has the properties expected of inner-most loops
1370 as described above. */
1371
1372 if ((loop->inner)->inner || (loop->inner)->next)
1373 return opt_result::failure_at (vect_location,
1374 "not vectorized:"
1375 " multiple nested loops.\n");
1376
1377 if (loop->num_nodes != 5)
1378 return opt_result::failure_at (vect_location,
1379 "not vectorized:"
1380 " control flow in loop.\n");
1381
1382 entryedge = loop_preheader_edge (innerloop);
1383 if (entryedge->src != loop->header
1384 || !single_exit (innerloop)
1385 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)(*(loop->latch)->preds)[(0)]->src)
1386 return opt_result::failure_at (vect_location,
1387 "not vectorized:"
1388 " unsupported outerloop form.\n");
1389
1390 /* Analyze the inner-loop. */
1391 vect_loop_form_info inner;
1392 opt_result res = vect_analyze_loop_form (loop->inner, &inner);
1393 if (!res)
1394 {
1395 if (dump_enabled_p ())
1396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1397 "not vectorized: Bad inner loop.\n");
1398 return res;
1399 }
1400
1401 /* Don't support analyzing niter under assumptions for inner
1402 loop. */
1403 if (!integer_onep (inner.assumptions))
1404 return opt_result::failure_at (vect_location,
1405 "not vectorized: Bad inner loop.\n");
1406
1407 if (!expr_invariant_in_loop_p (loop, inner.number_of_iterations))
1408 return opt_result::failure_at (vect_location,
1409 "not vectorized: inner-loop count not"
1410 " invariant.\n");
1411
1412 if (dump_enabled_p ())
1413 dump_printf_loc (MSG_NOTE, vect_location,
1414 "Considering outer-loop vectorization.\n");
1415 info->inner_loop_cond = inner.loop_cond;
1416 }
1417
1418 if (!single_exit (loop))
1419 return opt_result::failure_at (vect_location,
1420 "not vectorized: multiple exits.\n");
1421 if (EDGE_COUNT (loop->header->preds)vec_safe_length (loop->header->preds) != 2)
1422 return opt_result::failure_at (vect_location,
1423 "not vectorized:"
1424 " too many incoming edges.\n");
1425
1426 /* We assume that the loop exit condition is at the end of the loop. i.e,
1427 that the loop is represented as a do-while (with a proper if-guard
1428 before the loop if needed), where the loop header contains all the
1429 executable statements, and the latch is empty. */
1430 if (!empty_block_p (loop->latch)
1431 || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1432 return opt_result::failure_at (vect_location,
1433 "not vectorized: latch block not empty.\n");
1434
1435 /* Make sure the exit is not abnormal. */
1436 edge e = single_exit (loop);
1437 if (e->flags & EDGE_ABNORMAL)
1438 return opt_result::failure_at (vect_location,
1439 "not vectorized:"
1440 " abnormal loop exit edge.\n");
1441
1442 info->loop_cond
1443 = vect_get_loop_niters (loop, &info->assumptions,
1444 &info->number_of_iterations,
1445 &info->number_of_iterationsm1);
1446 if (!info->loop_cond)
1447 return opt_result::failure_at
1448 (vect_location,
1449 "not vectorized: complicated exit condition.\n");
1450
1451 if (integer_zerop (info->assumptions)
1452 || !info->number_of_iterations
1453 || chrec_contains_undetermined (info->number_of_iterations))
1454 return opt_result::failure_at
1455 (info->loop_cond,
1456 "not vectorized: number of iterations cannot be computed.\n");
1457
1458 if (integer_zerop (info->number_of_iterations))
1459 return opt_result::failure_at
1460 (info->loop_cond,
1461 "not vectorized: number of iterations = 0.\n");
1462
1463 if (!(tree_fits_shwi_p (info->number_of_iterations)
1464 && tree_to_shwi (info->number_of_iterations) > 0))
1465 {
1466 if (dump_enabled_p ())
1467 {
1468 dump_printf_loc (MSG_NOTE, vect_location,
1469 "Symbolic number of iterations is ");
1470 dump_generic_expr (MSG_NOTE, TDF_DETAILS, info->number_of_iterations);
1471 dump_printf (MSG_NOTE, "\n");
1472 }
1473 }
1474
1475 return opt_result::success ();
1476}
1477
1478/* Create a loop_vec_info for LOOP with SHARED and the
1479 vect_analyze_loop_form result. */
1480
1481loop_vec_info
1482vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
1483 const vect_loop_form_info *info,
1484 loop_vec_info main_loop_info)
1485{
1486 loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared);
1487 LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1 = info->number_of_iterationsm1;
1488 LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters = info->number_of_iterations;
1489 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo)(loop_vinfo)->num_iters_unchanged = info->number_of_iterations;
1490 LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = main_loop_info;
1491 /* Also record the assumptions for versioning. */
1492 if (!integer_onep (info->assumptions) && !main_loop_info)
1493 LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo)(loop_vinfo)->num_iters_assumptions = info->assumptions;
1494
1495 stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (info->loop_cond);
1496 STMT_VINFO_TYPE (loop_cond_info)(loop_cond_info)->type = loop_exit_ctrl_vec_info_type;
1497 if (info->inner_loop_cond)
1498 {
1499 stmt_vec_info inner_loop_cond_info
1500 = loop_vinfo->lookup_stmt (info->inner_loop_cond);
1501 STMT_VINFO_TYPE (inner_loop_cond_info)(inner_loop_cond_info)->type = loop_exit_ctrl_vec_info_type;
1502 /* If we have an estimate on the number of iterations of the inner
1503 loop use that to limit the scale for costing, otherwise use
1504 --param vect-inner-loop-cost-factor literally. */
1505 widest_int nit;
1506 if (estimated_stmt_executions (loop->inner, &nit))
1507 LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)(loop_vinfo)->inner_loop_cost_factor
1508 = wi::smin (nit, param_vect_inner_loop_cost_factorglobal_options.x_param_vect_inner_loop_cost_factor).to_uhwi ();
1509 }
1510
1511 return loop_vinfo;
1512}
1513
1514
1515
1516/* Scan the loop stmts and dependent on whether there are any (non-)SLP
1517 statements update the vectorization factor. */
1518
1519static void
1520vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1521{
1522 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1523 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
1524 int nbbs = loop->num_nodes;
1525 poly_uint64 vectorization_factor;
1526 int i;
1527
1528 DUMP_VECT_SCOPE ("vect_update_vf_for_slp")auto_dump_scope scope ("vect_update_vf_for_slp", vect_location
)
;
1529
1530 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
1531 gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1531, __FUNCTION__), 0 : 0))
;
1532
1533 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1534 vectorization factor of the loop is the unrolling factor required by
1535 the SLP instances. If that unrolling factor is 1, we say, that we
1536 perform pure SLP on loop - cross iteration parallelism is not
1537 exploited. */
1538 bool only_slp_in_loop = true;
1539 for (i = 0; i < nbbs; i++)
1540 {
1541 basic_block bb = bbs[i];
1542 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1543 gsi_next (&si))
1544 {
1545 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
1546 if (!stmt_info)
1547 continue;
1548 if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
1549 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info
)->def_type) == vect_double_reduction_def) || (((stmt_info
)->def_type) == vect_nested_cycle))
)
1550 && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1551 /* STMT needs both SLP and loop-based vectorization. */
1552 only_slp_in_loop = false;
1553 }
1554 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1555 gsi_next (&si))
1556 {
1557 if (is_gimple_debug (gsi_stmt (si)))
1558 continue;
1559 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
1560 stmt_info = vect_stmt_to_vectorize (stmt_info);
1561 if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
1562 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info
)->def_type) == vect_double_reduction_def) || (((stmt_info
)->def_type) == vect_nested_cycle))
)
1563 && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1564 /* STMT needs both SLP and loop-based vectorization. */
1565 only_slp_in_loop = false;
1566 }
1567 }
1568
1569 if (only_slp_in_loop)
1570 {
1571 if (dump_enabled_p ())
1572 dump_printf_loc (MSG_NOTE, vect_location,
1573 "Loop contains only SLP stmts\n");
1574 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor;
1575 }
1576 else
1577 {
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE, vect_location,
1580 "Loop contains SLP and non-SLP stmts\n");
1581 /* Both the vectorization factor and unroll factor have the form
1582 GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
1583 so they must have a common multiple. */
1584 vectorization_factor
1585 = force_common_multiple (vectorization_factor,
1586 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor);
1587 }
1588
1589 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor;
1590 if (dump_enabled_p ())
1591 {
1592 dump_printf_loc (MSG_NOTE, vect_location,
1593 "Updating vectorization factor to ");
1594 dump_dec (MSG_NOTE, vectorization_factor);
1595 dump_printf (MSG_NOTE, ".\n");
1596 }
1597}
1598
1599/* Return true if STMT_INFO describes a double reduction phi and if
1600 the other phi in the reduction is also relevant for vectorization.
1601 This rejects cases such as:
1602
1603 outer1:
1604 x_1 = PHI <x_3(outer2), ...>;
1605 ...
1606
1607 inner:
1608 x_2 = ...;
1609 ...
1610
1611 outer2:
1612 x_3 = PHI <x_2(inner)>;
1613
1614 if nothing in x_2 or elsewhere makes x_1 relevant. */
1615
1616static bool
1617vect_active_double_reduction_p (stmt_vec_info stmt_info)
1618{
1619 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_double_reduction_def)
1620 return false;
1621
1622 return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info))(((stmt_info)->reduc_def)->relevant != vect_unused_in_scope
)
;
1623}
1624
1625/* Function vect_analyze_loop_operations.
1626
1627 Scan the loop stmts and make sure they are all vectorizable. */
1628
1629static opt_result
1630vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1631{
1632 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1633 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
1634 int nbbs = loop->num_nodes;
1635 int i;
1636 stmt_vec_info stmt_info;
1637 bool need_to_vectorize = false;
1638 bool ok;
1639
1640 DUMP_VECT_SCOPE ("vect_analyze_loop_operations")auto_dump_scope scope ("vect_analyze_loop_operations", vect_location
)
;
1641
1642 auto_vec<stmt_info_for_cost> cost_vec;
1643
1644 for (i = 0; i < nbbs; i++)
1645 {
1646 basic_block bb = bbs[i];
1647
1648 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1649 gsi_next (&si))
1650 {
1651 gphi *phi = si.phi ();
1652 ok = true;
1653
1654 stmt_info = loop_vinfo->lookup_stmt (phi);
1655 if (dump_enabled_p ())
1656 dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G", phi);
1657 if (virtual_operand_p (gimple_phi_result (phi)))
1658 continue;
1659
1660 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1661 (i.e., a phi in the tail of the outer-loop). */
1662 if (! is_loop_header_bb_p (bb))
1663 {
1664 /* FORNOW: we currently don't support the case that these phis
1665 are not used in the outerloop (unless it is double reduction,
1666 i.e., this phi is vect_reduction_def), cause this case
1667 requires to actually do something here. */
1668 if (STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live
1669 && !vect_active_double_reduction_p (stmt_info))
1670 return opt_result::failure_at (phi,
1671 "Unsupported loop-closed phi"
1672 " in outer-loop.\n");
1673
1674 /* If PHI is used in the outer loop, we check that its operand
1675 is defined in the inner loop. */
1676 if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope))
1677 {
1678 tree phi_op;
1679
1680 if (gimple_phi_num_args (phi) != 1)
1681 return opt_result::failure_at (phi, "unsupported phi");
1682
1683 phi_op = PHI_ARG_DEF (phi, 0)gimple_phi_arg_def ((phi), (0));
1684 stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
1685 if (!op_def_info)
1686 return opt_result::failure_at (phi, "unsupported phi\n");
1687
1688 if (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant != vect_used_in_outer
1689 && (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant
1690 != vect_used_in_outer_by_reduction))
1691 return opt_result::failure_at (phi, "unsupported phi\n");
1692
1693 if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_internal_def
1694 || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type
1695 == vect_double_reduction_def))
1696 && !vectorizable_lc_phi (loop_vinfo,
1697 stmt_info, NULLnullptr, NULLnullptr))
1698 return opt_result::failure_at (phi, "unsupported phi\n");
1699 }
1700
1701 continue;
1702 }
1703
1704 gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1704, __FUNCTION__), 0 : 0))
;
1705
1706 if ((STMT_VINFO_RELEVANT (stmt_info)(stmt_info)->relevant == vect_used_in_scope
1707 || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live)
1708 && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_induction_def)
1709 /* A scalar-dependence cycle that we don't support. */
1710 return opt_result::failure_at (phi,
1711 "not vectorized:"
1712 " scalar dependence cycle.\n");
1713
1714 if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope))
1715 {
1716 need_to_vectorize = true;
1717 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_induction_def
1718 && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1719 ok = vectorizable_induction (loop_vinfo,
1720 stmt_info, NULLnullptr, NULLnullptr,
1721 &cost_vec);
1722 else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def
1723 || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type
1724 == vect_double_reduction_def)
1725 || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle)
1726 && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1727 ok = vectorizable_reduction (loop_vinfo,
1728 stmt_info, NULLnullptr, NULLnullptr, &cost_vec);
1729 }
1730
1731 /* SLP PHIs are tested by vect_slp_analyze_node_operations. */
1732 if (ok
1733 && STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live
1734 && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1735 ok = vectorizable_live_operation (loop_vinfo,
1736 stmt_info, NULLnullptr, NULLnullptr, NULLnullptr,
1737 -1, false, &cost_vec);
1738
1739 if (!ok)
1740 return opt_result::failure_at (phi,
1741 "not vectorized: relevant phi not "
1742 "supported: %G",
1743 static_cast <gimple *> (phi));
1744 }
1745
1746 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1747 gsi_next (&si))
1748 {
1749 gimple *stmt = gsi_stmt (si);
1750 if (!gimple_clobber_p (stmt)
1751 && !is_gimple_debug (stmt))
1752 {
1753 opt_result res
1754 = vect_analyze_stmt (loop_vinfo,
1755 loop_vinfo->lookup_stmt (stmt),
1756 &need_to_vectorize,
1757 NULLnullptr, NULLnullptr, &cost_vec);
1758 if (!res)
1759 return res;
1760 }
1761 }
1762 } /* bbs */
1763
1764 add_stmt_costs (loop_vinfo->vector_costs, &cost_vec);
1765
1766 /* All operations in the loop are either irrelevant (deal with loop
1767 control, or dead), or only used outside the loop and can be moved
1768 out of the loop (e.g. invariants, inductions). The loop can be
1769 optimized away by scalar optimizations. We're better off not
1770 touching this loop. */
1771 if (!need_to_vectorize)
1772 {
1773 if (dump_enabled_p ())
1774 dump_printf_loc (MSG_NOTE, vect_location,
1775 "All the computation can be taken out of the loop.\n");
1776 return opt_result::failure_at
1777 (vect_location,
1778 "not vectorized: redundant loop. no profit to vectorize.\n");
1779 }
1780
1781 return opt_result::success ();
1782}
1783
1784/* Return true if we know that the iteration count is smaller than the
1785 vectorization factor. Return false if it isn't, or if we can't be sure
1786 either way. */
1787
1788static bool
1789vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
1790{
1791 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
1792
1793 HOST_WIDE_INTlong max_niter;
1794 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
)
1795 max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1795, __FUNCTION__))))
;
1796 else
1797 max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
1798
1799 if (max_niter != -1 && (unsigned HOST_WIDE_INTlong) max_niter < assumed_vf)
1800 return true;
1801
1802 return false;
1803}
1804
1805/* Analyze the cost of the loop described by LOOP_VINFO. Decide if it
1806 is worthwhile to vectorize. Return 1 if definitely yes, 0 if
1807 definitely no, or -1 if it's worth retrying. */
1808
1809static int
1810vect_analyze_loop_costing (loop_vec_info loop_vinfo)
1811{
1812 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1813 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
1814
1815 /* Only loops that can handle partially-populated vectors can have iteration
1816 counts less than the vectorization factor. */
1817 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
1818 {
1819 if (vect_known_niters_smaller_than_vf (loop_vinfo))
1820 {
1821 if (dump_enabled_p ())
1822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1823 "not vectorized: iteration count smaller than "
1824 "vectorization factor.\n");
1825 return 0;
1826 }
1827 }
1828
1829 /* If using the "very cheap" model. reject cases in which we'd keep
1830 a copy of the scalar code (even if we might be able to vectorize it). */
1831 if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
1832 && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment
1833 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
1834 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter))
1835 {
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838 "some scalar iterations would need to be peeled\n");
1839 return 0;
1840 }
1841
1842 int min_profitable_iters, min_profitable_estimate;
1843 vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1844 &min_profitable_estimate);
1845
1846 if (min_profitable_iters < 0)
1847 {
1848 if (dump_enabled_p ())
1849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1850 "not vectorized: vectorization not profitable.\n");
1851 if (dump_enabled_p ())
1852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1853 "not vectorized: vector version will never be "
1854 "profitable.\n");
1855 return -1;
1856 }
1857
1858 int min_scalar_loop_bound = (param_min_vect_loop_boundglobal_options.x_param_min_vect_loop_bound
1859 * assumed_vf);
1860
1861 /* Use the cost model only if it is more conservative than user specified
1862 threshold. */
1863 unsigned int th = (unsigned) MAX (min_scalar_loop_bound,((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound
) : (min_profitable_iters))
1864 min_profitable_iters)((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound
) : (min_profitable_iters))
;
1865
1866 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = th;
1867
1868 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
1869 && LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1869, __FUNCTION__))))
< th)
1870 {
1871 if (dump_enabled_p ())
1872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1873 "not vectorized: vectorization not profitable.\n");
1874 if (dump_enabled_p ())
1875 dump_printf_loc (MSG_NOTE, vect_location,
1876 "not vectorized: iteration count smaller than user "
1877 "specified loop bound parameter or minimum profitable "
1878 "iterations (whichever is more conservative).\n");
1879 return 0;
1880 }
1881
1882 /* The static profitablity threshold min_profitable_estimate includes
1883 the cost of having to check at runtime whether the scalar loop
1884 should be used instead. If it turns out that we don't need or want
1885 such a check, the threshold we should use for the static estimate
1886 is simply the point at which the vector loop becomes more profitable
1887 than the scalar loop. */
1888 if (min_profitable_estimate > min_profitable_iters
1889 && !LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
1890 && !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter
1891 && !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment
1892 && !vect_apply_runtime_profitability_check_p (loop_vinfo))
1893 {
1894 if (dump_enabled_p ())
1895 dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime"
1896 " choice between the scalar and vector loops\n");
1897 min_profitable_estimate = min_profitable_iters;
1898 }
1899
1900 /* If the vector loop needs multiple iterations to be beneficial then
1901 things are probably too close to call, and the conservative thing
1902 would be to stick with the scalar code. */
1903 if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
1904 && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
1905 {
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1908 "one iteration of the vector loop would be"
1909 " more expensive than the equivalent number of"
1910 " iterations of the scalar loop\n");
1911 return 0;
1912 }
1913
1914 HOST_WIDE_INTlong estimated_niter;
1915
1916 /* If we are vectorizing an epilogue then we know the maximum number of
1917 scalar iterations it will cover is at least one lower than the
1918 vectorization factor of the main loop. */
1919 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
1920 estimated_niter
1921 = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) - 1;
1922 else
1923 {
1924 estimated_niter = estimated_stmt_executions_int (loop);
1925 if (estimated_niter == -1)
1926 estimated_niter = likely_max_stmt_executions_int (loop);
1927 }
1928 if (estimated_niter != -1
1929 && ((unsigned HOST_WIDE_INTlong) estimated_niter
1930 < MAX (th, (unsigned) min_profitable_estimate)((th) > ((unsigned) min_profitable_estimate) ? (th) : ((unsigned
) min_profitable_estimate))
))
1931 {
1932 if (dump_enabled_p ())
1933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1934 "not vectorized: estimated iteration count too "
1935 "small.\n");
1936 if (dump_enabled_p ())
1937 dump_printf_loc (MSG_NOTE, vect_location,
1938 "not vectorized: estimated iteration count smaller "
1939 "than specified loop bound parameter or minimum "
1940 "profitable iterations (whichever is more "
1941 "conservative).\n");
1942 return -1;
1943 }
1944
1945 return 1;
1946}
1947
1948static opt_result
1949vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
1950 vec<data_reference_p> *datarefs,
1951 unsigned int *n_stmts)
1952{
1953 *n_stmts = 0;
1954 for (unsigned i = 0; i < loop->num_nodes; i++)
1955 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1956 !gsi_end_p (gsi); gsi_next (&gsi))
1957 {
1958 gimple *stmt = gsi_stmt (gsi);
1959 if (is_gimple_debug (stmt))
1960 continue;
1961 ++(*n_stmts);
1962 opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,
1963 NULLnullptr, 0);
1964 if (!res)
1965 {
1966 if (is_gimple_call (stmt) && loop->safelen)
1967 {
1968 tree fndecl = gimple_call_fndecl (stmt), op;
1969 if (fndecl != NULL_TREE(tree) nullptr)
1970 {
1971 cgraph_node *node = cgraph_node::get (fndecl);
1972 if (node != NULLnullptr && node->simd_clones != NULLnullptr)
1973 {
1974 unsigned int j, n = gimple_call_num_args (stmt);
1975 for (j = 0; j < n; j++)
1976 {
1977 op = gimple_call_arg (stmt, j);
1978 if (DECL_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_declaration)
1979 || (REFERENCE_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_reference)
1980 && get_base_address (op)))
1981 break;
1982 }
1983 op = gimple_call_lhs (stmt);
1984 /* Ignore #pragma omp declare simd functions
1985 if they don't have data references in the
1986 call stmt itself. */
1987 if (j == n
1988 && !(op
1989 && (DECL_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_declaration)
1990 || (REFERENCE_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_reference)
1991 && get_base_address (op)))))
1992 continue;
1993 }
1994 }
1995 }
1996 return res;
1997 }
1998 /* If dependence analysis will give up due to the limit on the
1999 number of datarefs stop here and fail fatally. */
2000 if (datarefs->length ()
2001 > (unsigned)param_loop_max_datarefs_for_datadepsglobal_options.x_param_loop_max_datarefs_for_datadeps)
2002 return opt_result::failure_at (stmt, "exceeded param "
2003 "loop-max-datarefs-for-datadeps\n");
2004 }
2005 return opt_result::success ();
2006}
2007
2008/* Look for SLP-only access groups and turn each individual access into its own
2009 group. */
2010static void
2011vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
2012{
2013 unsigned int i;
2014 struct data_reference *dr;
2015
2016 DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups")auto_dump_scope scope ("vect_dissolve_slp_only_groups", vect_location
)
;
2017
2018 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs;
2019 FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i))
2020 {
2021 gcc_assert (DR_REF (dr))((void)(!((dr)->ref) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2021, __FUNCTION__), 0 : 0))
;
2022 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr)(dr)->stmt);
2023
2024 /* Check if the load is a part of an interleaving chain. */
2025 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)->
dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2025, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
))
)
2026 {
2027 stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2027, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
;
2028 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element)(((void)(!((first_element)->dr_aux.stmt == (first_element)
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2028, __FUNCTION__), 0 : 0)), &(first_element)->dr_aux
)
;
2029 unsigned int group_size = DR_GROUP_SIZE (first_element)(((void)(!((first_element)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2029, __FUNCTION__), 0 : 0)), (first_element)->size)
;
2030
2031 /* Check if SLP-only groups. */
2032 if (!STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type
2033 && STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p)
2034 {
2035 /* Dissolve the group. */
2036 STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p = false;
2037
2038 stmt_vec_info vinfo = first_element;
2039 while (vinfo)
2040 {
2041 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2041, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
;
2042 DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2042, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
= vinfo;
2043 DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2043, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
= NULLnullptr;
2044 DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2044, __FUNCTION__), 0 : 0)), (vinfo)->size)
= 1;
2045 if (STMT_VINFO_STRIDED_P (first_element)(first_element)->strided_p)
2046 DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2046, __FUNCTION__), 0 : 0)), (vinfo)->gap)
= 0;
2047 else
2048 DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2048, __FUNCTION__), 0 : 0)), (vinfo)->gap)
= group_size - 1;
2049 /* Duplicate and adjust alignment info, it needs to
2050 be present on each group leader, see dr_misalignment. */
2051 if (vinfo != first_element)
2052 {
2053 dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo)(((void)(!((vinfo)->dr_aux.stmt == (vinfo)) ? fancy_abort (
"/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2053, __FUNCTION__), 0 : 0)), &(vinfo)->dr_aux)
;
2054 dr_info2->target_alignment = dr_info->target_alignment;
2055 int misalignment = dr_info->misalignment;
2056 if (misalignment != DR_MISALIGNMENT_UNKNOWN(-1))
2057 {
2058 HOST_WIDE_INTlong diff
2059 = (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info2->dr)
->innermost.init), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2059, __FUNCTION__)))
2060 - TREE_INT_CST_LOW (DR_INIT (dr_info->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info->dr)->
innermost.init), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2060, __FUNCTION__)))
);
2061 unsigned HOST_WIDE_INTlong align_c
2062 = dr_info->target_alignment.to_constant ();
2063 misalignment = (misalignment + diff) % align_c;
2064 }
2065 dr_info2->misalignment = misalignment;
2066 }
2067 vinfo = next;
2068 }
2069 }
2070 }
2071 }
2072}
2073
2074/* Determine if operating on full vectors for LOOP_VINFO might leave
2075 some scalar iterations still to do. If so, decide how we should
2076 handle those scalar iterations. The possibilities are:
2077
2078 (1) Make LOOP_VINFO operate on partial vectors instead of full vectors.
2079 In this case:
2080
2081 LOOP_VINFO_USING_PARTIAL_VECTORS_P == true
2082 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2083 LOOP_VINFO_PEELING_FOR_NITER == false
2084
2085 (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop
2086 to handle the remaining scalar iterations. In this case:
2087
2088 LOOP_VINFO_USING_PARTIAL_VECTORS_P == false
2089 LOOP_VINFO_PEELING_FOR_NITER == true
2090
2091 There are two choices:
2092
2093 (2a) Consider vectorizing the epilogue loop at the same VF as the
2094 main loop, but using partial vectors instead of full vectors.
2095 In this case:
2096
2097 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true
2098
2099 (2b) Consider vectorizing the epilogue loop at lower VFs only.
2100 In this case:
2101
2102 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2103
2104 When FOR_EPILOGUE_P is true, make this determination based on the
2105 assumption that LOOP_VINFO is an epilogue loop, otherwise make it
2106 based on the assumption that LOOP_VINFO is the main loop. The caller
2107 has made sure that the number of iterations is set appropriately for
2108 this value of FOR_EPILOGUE_P. */
2109
2110opt_result
2111vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
2112 bool for_epilogue_p)
2113{
2114 /* Determine whether there would be any scalar iterations left over. */
2115 bool need_peeling_or_partial_vectors_p
2116 = vect_need_peeling_or_partial_vectors_p (loop_vinfo);
2117
2118 /* Decide whether to vectorize the loop with partial vectors. */
2119 LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = false;
2120 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = false;
2121 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2122 && need_peeling_or_partial_vectors_p)
2123 {
2124 /* For partial-vector-usage=1, try to push the handling of partial
2125 vectors to the epilogue, with the main loop continuing to operate
2126 on full vectors.
2127
2128 ??? We could then end up failing to use partial vectors if we
2129 decide to peel iterations into a prologue, and if the main loop
2130 then ends up processing fewer than VF iterations. */
2131 if (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage == 1
2132 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)
2133 && !vect_known_niters_smaller_than_vf (loop_vinfo))
2134 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = true;
2135 else
2136 LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = true;
2137 }
2138
2139 if (dump_enabled_p ())
2140 {
2141 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2142 dump_printf_loc (MSG_NOTE, vect_location,
2143 "operating on partial vectors%s.\n",
2144 for_epilogue_p ? " for epilogue loop" : "");
2145 else
2146 dump_printf_loc (MSG_NOTE, vect_location,
2147 "operating only on full vectors%s.\n",
2148 for_epilogue_p ? " for epilogue loop" : "");
2149 }
2150
2151 if (for_epilogue_p)
2152 {
2153 loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info;
2154 gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2154, __FUNCTION__), 0 : 0))
;
2155 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2156 gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor
, (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2157, __FUNCTION__), 0 : 0))
2157 LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor
, (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2157, __FUNCTION__), 0 : 0))
;
2158 }
2159
2160 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
2161 && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2162 {
2163 /* Check that the loop processes at least one full vector. */
2164 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2165 tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters;
2166 if (known_lt (wi::to_widest (scalar_niters), vf)(!maybe_le (vf, wi::to_widest (scalar_niters))))
2167 return opt_result::failure_at (vect_location,
2168 "loop does not have enough iterations"
2169 " to support vectorization.\n");
2170
2171 /* If we need to peel an extra epilogue iteration to handle data
2172 accesses with gaps, check that there are enough scalar iterations
2173 available.
2174
2175 The check above is redundant with this one when peeling for gaps,
2176 but the distinction is useful for diagnostics. */
2177 tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1;
2178 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
2179 && known_lt (wi::to_widest (scalar_nitersm1), vf)(!maybe_le (vf, wi::to_widest (scalar_nitersm1))))
2180 return opt_result::failure_at (vect_location,
2181 "loop does not have enough iterations"
2182 " to support peeling for gaps.\n");
2183 }
2184
2185 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter
2186 = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p
2187 && need_peeling_or_partial_vectors_p);
2188
2189 return opt_result::success ();
2190}
2191
2192/* Function vect_analyze_loop_2.
2193
2194 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2195 for it. The different analyses will record information in the
2196 loop_vec_info struct. */
2197static opt_result
2198vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
2199{
2200 opt_result ok = opt_result::success ();
2201 int res;
2202 unsigned int max_vf = MAX_VECTORIZATION_FACTOR2147483647;
2203 poly_uint64 min_vf = 2;
2204 loop_vec_info orig_loop_vinfo = NULLnullptr;
2205
2206 /* If we are dealing with an epilogue then orig_loop_vinfo points to the
2207 loop_vec_info of the first vectorized loop. */
2208 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
2209 orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info;
2210 else
2211 orig_loop_vinfo = loop_vinfo;
2212 gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2212, __FUNCTION__), 0 : 0))
;
2213
2214 /* The first group of checks is independent of the vector size. */
2215 fatal = true;
2216
2217 if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond
2218 && integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond))
2219 return opt_result::failure_at (vect_location,
2220 "not vectorized: simd if(0)\n");
2221
2222 /* Find all data references in the loop (which correspond to vdefs/vuses)
2223 and analyze their evolution in the loop. */
2224
2225 loop_p loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
2226
2227 /* Gather the data references and count stmts in the loop. */
2228 if (!LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.exists ())
2229 {
2230 opt_result res
2231 = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs,
2232 &LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs,
2233 &LOOP_VINFO_N_STMTS (loop_vinfo)(loop_vinfo)->shared->n_stmts);
2234 if (!res)
2235 {
2236 if (dump_enabled_p ())
2237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2238 "not vectorized: loop contains function "
2239 "calls or data references that cannot "
2240 "be analyzed\n");
2241 return res;
2242 }
2243 loop_vinfo->shared->save_datarefs ();
2244 }
2245 else
2246 loop_vinfo->shared->check_datarefs ();
2247
2248 /* Analyze the data references and also adjust the minimal
2249 vectorization factor according to the loads and stores. */
2250
2251 ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
2252 if (!ok)
2253 {
2254 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2256 "bad data references.\n");
2257 return ok;
2258 }
2259
2260 /* Classify all cross-iteration scalar data-flow cycles.
2261 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2262 vect_analyze_scalar_cycles (loop_vinfo);
2263
2264 vect_pattern_recog (loop_vinfo);
2265
2266 vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
2267
2268 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2269 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2270
2271 ok = vect_analyze_data_ref_accesses (loop_vinfo, NULLnullptr);
2272 if (!ok)
2273 {
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "bad data access.\n");
2277 return ok;
2278 }
2279
2280 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2281
2282 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal);
2283 if (!ok)
2284 {
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 "unexpected pattern.\n");
2288 return ok;
2289 }
2290
2291 /* While the rest of the analysis below depends on it in some way. */
2292 fatal = false;
2293
2294 /* Analyze data dependences between the data-refs in the loop
2295 and adjust the maximum vectorization factor according to
2296 the dependences.
2297 FORNOW: fail at the first data dependence that we encounter. */
2298
2299 ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
2300 if (!ok)
2301 {
2302 if (dump_enabled_p ())
2303 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2304 "bad data dependence.\n");
2305 return ok;
2306 }
2307 if (max_vf != MAX_VECTORIZATION_FACTOR2147483647
2308 && maybe_lt (max_vf, min_vf))
2309 return opt_result::failure_at (vect_location, "bad data dependence.\n");
2310 LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor = max_vf;
2311
2312 ok = vect_determine_vectorization_factor (loop_vinfo);
2313 if (!ok)
2314 {
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "can't determine vectorization factor.\n");
2318 return ok;
2319 }
2320 if (max_vf != MAX_VECTORIZATION_FACTOR2147483647
2321 && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor))
2322 return opt_result::failure_at (vect_location, "bad data dependence.\n");
2323
2324 /* Compute the scalar iteration cost. */
2325 vect_compute_single_scalar_iteration_cost (loop_vinfo);
2326
2327 poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2328
2329 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
2330 ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo)(loop_vinfo)->shared->n_stmts);
2331 if (!ok)
2332 return ok;
2333
2334 /* If there are any SLP instances mark them as pure_slp. */
2335 bool slp = vect_make_slp_decision (loop_vinfo);
2336 if (slp)
2337 {
2338 /* Find stmts that need to be both vectorized and SLPed. */
2339 vect_detect_hybrid_slp (loop_vinfo);
2340
2341 /* Update the vectorization factor based on the SLP decision. */
2342 vect_update_vf_for_slp (loop_vinfo);
2343
2344 /* Optimize the SLP graph with the vectorization factor fixed. */
2345 vect_optimize_slp (loop_vinfo);
2346
2347 /* Gather the loads reachable from the SLP graph entries. */
2348 vect_gather_slp_loads (loop_vinfo);
2349 }
2350
2351 bool saved_can_use_partial_vectors_p
2352 = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p;
2353
2354 /* We don't expect to have to roll back to anything other than an empty
2355 set of rgroups. */
2356 gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())((void)(!((loop_vinfo)->masks.is_empty ()) ? fancy_abort (
"/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2356, __FUNCTION__), 0 : 0))
;
2357
2358 /* This is the point where we can re-start analysis with SLP forced off. */
2359start_over:
2360
2361 /* Now the vectorization factor is final. */
2362 poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2363 gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2363, __FUNCTION__), 0 : 0))
;
2364
2365 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
&& dump_enabled_p ())
2366 {
2367 dump_printf_loc (MSG_NOTE, vect_location,
2368 "vectorization_factor = ");
2369 dump_dec (MSG_NOTE, vectorization_factor);
2370 dump_printf (MSG_NOTE, ", niters = %wd\n",
2371 LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2371, __FUNCTION__))))
);
2372 }
2373
2374 loop_vinfo->vector_costs = init_cost (loop_vinfo, false);
2375
2376 /* Analyze the alignment of the data-refs in the loop.
2377 Fail if a data reference is found that cannot be vectorized. */
2378
2379 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2380 if (!ok)
2381 {
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2384 "bad data alignment.\n");
2385 return ok;
2386 }
2387
2388 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
2389 It is important to call pruning after vect_analyze_data_ref_accesses,
2390 since we use grouping information gathered by interleaving analysis. */
2391 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2392 if (!ok)
2393 return ok;
2394
2395 /* Do not invoke vect_enhance_data_refs_alignment for epilogue
2396 vectorization, since we do not want to add extra peeling or
2397 add versioning for alignment. */
2398 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
2399 /* This pass will decide on using loop versioning and/or loop peeling in
2400 order to enhance the alignment of data references in the loop. */
2401 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2402 if (!ok)
2403 return ok;
2404
2405 if (slp)
2406 {
2407 /* Analyze operations in the SLP instances. Note this may
2408 remove unsupported SLP instances which makes the above
2409 SLP kind detection invalid. */
2410 unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length ();
2411 vect_slp_analyze_operations (loop_vinfo);
2412 if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length () != old_size)
2413 {
2414 ok = opt_result::failure_at (vect_location,
2415 "unsupported SLP instances\n");
2416 goto again;
2417 }
2418
2419 /* Check whether any load in ALL SLP instances is possibly permuted. */
2420 slp_tree load_node, slp_root;
2421 unsigned i, x;
2422 slp_instance instance;
2423 bool can_use_lanes = true;
2424 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)for (x = 0; ((loop_vinfo)->slp_instances).iterate ((x), &
(instance)); ++(x))
2425 {
2426 slp_root = SLP_INSTANCE_TREE (instance)(instance)->root;
2427 int group_size = SLP_TREE_LANES (slp_root)(slp_root)->lanes;
2428 tree vectype = SLP_TREE_VECTYPE (slp_root)(slp_root)->vectype;
2429 bool loads_permuted = false;
2430 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node
)); ++(i))
2431 {
2432 if (!SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation.exists ())
2433 continue;
2434 unsigned j;
2435 stmt_vec_info load_info;
2436 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)for (j = 0; ((load_node)->stmts).iterate ((j), &(load_info
)); ++(j))
2437 if (SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation[j] != j)
2438 {
2439 loads_permuted = true;
2440 break;
2441 }
2442 }
2443
2444 /* If the loads and stores can be handled with load/store-lane
2445 instructions record it and move on to the next instance. */
2446 if (loads_permuted
2447 && SLP_INSTANCE_KIND (instance)(instance)->kind == slp_inst_kind_store
2448 && vect_store_lanes_supported (vectype, group_size, false))
2449 {
2450 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node
)); ++(i))
2451 {
2452 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2453, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])->
first_element)
2453 (SLP_TREE_SCALAR_STMTS (load_node)[0])(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2453, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])->
first_element)
;
2454 /* Use SLP for strided accesses (or if we can't
2455 load-lanes). */
2456 if (STMT_VINFO_STRIDED_P (stmt_vinfo)(stmt_vinfo)->strided_p
2457 || ! vect_load_lanes_supported
2458 (STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype,
2459 DR_GROUP_SIZE (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2459, __FUNCTION__), 0 : 0)), (stmt_vinfo)->size)
, false))
2460 break;
2461 }
2462
2463 can_use_lanes
2464 = can_use_lanes && i == SLP_INSTANCE_LOADS (instance)(instance)->loads.length ();
2465
2466 if (can_use_lanes && dump_enabled_p ())
2467 dump_printf_loc (MSG_NOTE, vect_location,
2468 "SLP instance %p can use load/store-lanes\n",
2469 instance);
2470 }
2471 else
2472 {
2473 can_use_lanes = false;
2474 break;
2475 }
2476 }
2477
2478 /* If all SLP instances can use load/store-lanes abort SLP and try again
2479 with SLP disabled. */
2480 if (can_use_lanes)
2481 {
2482 ok = opt_result::failure_at (vect_location,
2483 "Built SLP cancelled: can use "
2484 "load/store-lanes\n");
2485 if (dump_enabled_p ())
2486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2487 "Built SLP cancelled: all SLP instances support "
2488 "load/store-lanes\n");
2489 goto again;
2490 }
2491 }
2492
2493 /* Dissolve SLP-only groups. */
2494 vect_dissolve_slp_only_groups (loop_vinfo);
2495
2496 /* Scan all the remaining operations in the loop that are not subject
2497 to SLP and make sure they are vectorizable. */
2498 ok = vect_analyze_loop_operations (loop_vinfo);
2499 if (!ok)
2500 {
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2503 "bad operation or unsupported loop bound.\n");
2504 return ok;
2505 }
2506
2507 /* For now, we don't expect to mix both masking and length approaches for one
2508 loop, disable it if both are recorded. */
2509 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2510 && !LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ()
2511 && !LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ())
2512 {
2513 if (dump_enabled_p ())
2514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2515 "can't vectorize a loop with partial vectors"
2516 " because we don't expect to mix different"
2517 " approaches with partial vectors for the"
2518 " same loop.\n");
2519 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false;
2520 }
2521
2522 /* If we still have the option of using partial vectors,
2523 check whether we can generate the necessary loop controls. */
2524 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2525 && !vect_verify_full_masking (loop_vinfo)
2526 && !vect_verify_loop_lens (loop_vinfo))
2527 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false;
2528
2529 /* If we're vectorizing an epilogue loop, the vectorized loop either needs
2530 to be able to handle fewer than VF scalars, or needs to have a lower VF
2531 than the main loop. */
2532 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)
2533 && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2534 && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo
)->vectorization_factor)
2535 LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo
)->vectorization_factor)
)
2536 return opt_result::failure_at (vect_location,
2537 "Vectorization factor too high for"
2538 " epilogue loop.\n");
2539
2540 /* Decide whether this loop_vinfo should use partial vectors or peeling,
2541 assuming that the loop will be used as a main loop. We will redo
2542 this analysis later if we instead decide to use the loop as an
2543 epilogue loop. */
2544 ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false);
2545 if (!ok)
2546 return ok;
2547
2548 /* Check the costings of the loop make vectorizing worthwhile. */
2549 res = vect_analyze_loop_costing (loop_vinfo);
2550 if (res < 0)
2551 {
2552 ok = opt_result::failure_at (vect_location,
2553 "Loop costings may not be worthwhile.\n");
2554 goto again;
2555 }
2556 if (!res)
2557 return opt_result::failure_at (vect_location,
2558 "Loop costings not worthwhile.\n");
2559
2560 /* If an epilogue loop is required make sure we can create one. */
2561 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
2562 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter)
2563 {
2564 if (dump_enabled_p ())
2565 dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2566 if (!vect_can_advance_ivs_p (loop_vinfo)
2567 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop,
2568 single_exit (LOOP_VINFO_LOOP(loop_vinfo)->loop
2569 (loop_vinfo)(loop_vinfo)->loop)))
2570 {
2571 ok = opt_result::failure_at (vect_location,
2572 "not vectorized: can't create required "
2573 "epilog loop\n");
2574 goto again;
2575 }
2576 }
2577
2578 /* During peeling, we need to check if number of loop iterations is
2579 enough for both peeled prolog loop and vector loop. This check
2580 can be merged along with threshold check of loop versioning, so
2581 increase threshold for this case if necessary.
2582
2583 If we are analyzing an epilogue we still want to check what its
2584 versioning threshold would be. If we decide to vectorize the epilogues we
2585 will want to use the lowest versioning threshold of all epilogues and main
2586 loop. This will enable us to enter a vectorized epilogue even when
2587 versioning the loop. We can't simply check whether the epilogue requires
2588 versioning though since we may have skipped some versioning checks when
2589 analyzing the epilogue. For instance, checks for alias versioning will be
2590 skipped when dealing with epilogues as we assume we already checked them
2591 for the main loop. So instead we always check the 'orig_loop_vinfo'. */
2592 if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo)(((orig_loop_vinfo)->may_misalign_stmts.length () > 0) ||
((orig_loop_vinfo)->comp_alias_ddrs.length () > 0 || (
orig_loop_vinfo)->check_unequal_addrs.length () > 0 || (
orig_loop_vinfo)->lower_bounds.length () > 0) || ((orig_loop_vinfo
)->num_iters_assumptions) || ((orig_loop_vinfo)->simd_if_cond
))
)
2593 {
2594 poly_uint64 niters_th = 0;
2595 unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th;
2596
2597 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2598 {
2599 /* Niters for peeled prolog loop. */
2600 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0)
2601 {
2602 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo)(loop_vinfo)->unaligned_dr;
2603 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype;
2604 niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1;
2605 }
2606 else
2607 niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment;
2608 }
2609
2610 /* Niters for at least one iteration of vectorized loop. */
2611 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2612 niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2613 /* One additional iteration because of peeling for gap. */
2614 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps)
2615 niters_th += 1;
2616
2617 /* Use the same condition as vect_transform_loop to decide when to use
2618 the cost to determine a versioning threshold. */
2619 if (vect_apply_runtime_profitability_check_p (loop_vinfo)
2620 && ordered_p (th, niters_th))
2621 niters_th = ordered_max (poly_uint64 (th), niters_th);
2622
2623 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = niters_th;
2624 }
2625
2626 gcc_assert (known_eq (vectorization_factor,((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)->
vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2627, __FUNCTION__), 0 : 0))
2627 LOOP_VINFO_VECT_FACTOR (loop_vinfo)))((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)->
vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2627, __FUNCTION__), 0 : 0))
;
2628
2629 /* Ok to vectorize! */
2630 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)(loop_vinfo)->vectorizable = 1;
2631 return opt_result::success ();
2632
2633again:
2634 /* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */
2635 gcc_assert (!ok)((void)(!(!ok) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2635, __FUNCTION__), 0 : 0))
;
2636
2637 /* Try again with SLP forced off but if we didn't do any SLP there is
2638 no point in re-trying. */
2639 if (!slp)
2640 return ok;
2641
2642 /* If there are reduction chains re-trying will fail anyway. */
2643 if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.is_empty ())
2644 return ok;
2645
2646 /* Likewise if the grouped loads or stores in the SLP cannot be handled
2647 via interleaving or lane instructions. */
2648 slp_instance instance;
2649 slp_tree node;
2650 unsigned i, j;
2651 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)for (i = 0; ((loop_vinfo)->slp_instances).iterate ((i), &
(instance)); ++(i))
2652 {
2653 stmt_vec_info vinfo;
2654 vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))((instance)->root)->stmts[0];
2655 if (! STMT_VINFO_GROUPED_ACCESS (vinfo)((vinfo)->dr_aux.dr && (((void)(!((vinfo)->dr_aux
.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2655, __FUNCTION__), 0 : 0)), (vinfo)->first_element))
)
2656 continue;
2657 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2657, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
;
2658 unsigned int size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2658, __FUNCTION__), 0 : 0)), (vinfo)->size)
;
2659 tree vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype;
2660 if (! vect_store_lanes_supported (vectype, size, false)
2661 && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2662 && ! vect_grouped_store_supported (vectype, size))
2663 return opt_result::failure_at (vinfo->stmt,
2664 "unsupported grouped store\n");
2665 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)for (j = 0; ((instance)->loads).iterate ((j), &(node))
; ++(j))
2666 {
2667 vinfo = SLP_TREE_SCALAR_STMTS (node)(node)->stmts[0];
2668 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2668, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
;
2669 bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2669, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
;
2670 size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2670, __FUNCTION__), 0 : 0)), (vinfo)->size)
;
2671 vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype;
2672 if (! vect_load_lanes_supported (vectype, size, false)
2673 && ! vect_grouped_load_supported (vectype, single_element_p,
2674 size))
2675 return opt_result::failure_at (vinfo->stmt,
2676 "unsupported grouped load\n");
2677 }
2678 }
2679
2680 if (dump_enabled_p ())
2681 dump_printf_loc (MSG_NOTE, vect_location,
2682 "re-trying with SLP disabled\n");
2683
2684 /* Roll back state appropriately. No SLP this time. */
2685 slp = false;
2686 /* Restore vectorization factor as it were without SLP. */
2687 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = saved_vectorization_factor;
2688 /* Free the SLP instances. */
2689 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)for (j = 0; ((loop_vinfo)->slp_instances).iterate ((j), &
(instance)); ++(j))
2690 vect_free_slp_instance (instance);
2691 LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.release ();
2692 /* Reset SLP type to loop_vect on all stmts. */
2693 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->num_nodes; ++i)
2694 {
2695 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs[i];
2696 for (gimple_stmt_iterator si = gsi_start_phis (bb);
2697 !gsi_end_p (si); gsi_next (&si))
2698 {
2699 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
2700 STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect;
2701 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def
2702 || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def)
2703 {
2704 /* vectorizable_reduction adjusts reduction stmt def-types,
2705 restore them to that of the PHI. */
2706 STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))((stmt_info)->reduc_def)->def_type
2707 = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type;
2708 STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type
2709 (STMT_VINFO_REDUC_DEF (stmt_info)))(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type
2710 = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type;
2711 }
2712 }
2713 for (gimple_stmt_iterator si = gsi_start_bb (bb);
2714 !gsi_end_p (si); gsi_next (&si))
2715 {
2716 if (is_gimple_debug (gsi_stmt (si)))
2717 continue;
2718 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
2719 STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect;
2720 if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p)
2721 {
2722 stmt_vec_info pattern_stmt_info
2723 = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
2724 if (STMT_VINFO_SLP_VECT_ONLY_PATTERN (pattern_stmt_info)(pattern_stmt_info)->slp_vect_pattern_only_p)
2725 STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p = false;
2726
2727 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq;
2728 STMT_SLP_TYPE (pattern_stmt_info)(pattern_stmt_info)->slp_type = loop_vect;
2729 for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq)gsi_start_1 (&(pattern_def_seq));
2730 !gsi_end_p (pi); gsi_next (&pi))
2731 STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))(loop_vinfo->lookup_stmt (gsi_stmt (pi)))->slp_type
2732 = loop_vect;
2733 }
2734 }
2735 }
2736 /* Free optimized alias test DDRS. */
2737 LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.truncate (0);
2738 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.release ();
2739 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.release ();
2740 /* Reset target cost data. */
2741 delete loop_vinfo->vector_costs;
2742 loop_vinfo->vector_costs = nullptr;
2743 /* Reset accumulated rgroup information. */
2744 release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks);
2745 release_vec_loop_controls (&LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens);
2746 /* Reset assorted flags. */
2747 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter = false;
2748 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps = false;
2749 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = 0;
2750 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = 0;
2751 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2752 = saved_can_use_partial_vectors_p;
2753
2754 goto start_over;
2755}
2756
2757/* Return true if vectorizing a loop using NEW_LOOP_VINFO appears
2758 to be better than vectorizing it using OLD_LOOP_VINFO. Assume that
2759 OLD_LOOP_VINFO is better unless something specifically indicates
2760 otherwise.
2761
2762 Note that this deliberately isn't a partial order. */
2763
2764static bool
2765vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
2766 loop_vec_info old_loop_vinfo)
2767{
2768 struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo)(new_loop_vinfo)->loop;
2769 gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop)((void)(!((old_loop_vinfo)->loop == loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2769, __FUNCTION__), 0 : 0))
;
2770
2771 poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo)(new_loop_vinfo)->vectorization_factor;
2772 poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo)(old_loop_vinfo)->vectorization_factor;
2773
2774 /* Always prefer a VF of loop->simdlen over any other VF. */
2775 if (loop->simdlen)
2776 {
2777 bool new_simdlen_p = known_eq (new_vf, loop->simdlen)(!maybe_ne (new_vf, loop->simdlen));
2778 bool old_simdlen_p = known_eq (old_vf, loop->simdlen)(!maybe_ne (old_vf, loop->simdlen));
2779 if (new_simdlen_p != old_simdlen_p)
2780 return new_simdlen_p;
2781 }
2782
2783 const auto *old_costs = old_loop_vinfo->vector_costs;
2784 const auto *new_costs = new_loop_vinfo->vector_costs;
2785 if (loop_vec_info main_loop = LOOP_VINFO_ORIG_LOOP_INFO (old_loop_vinfo)(old_loop_vinfo)->orig_loop_info)
2786 return new_costs->better_epilogue_loop_than_p (old_costs, main_loop);
2787
2788 return new_costs->better_main_loop_than_p (old_costs);
2789}
2790
2791/* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return
2792 true if we should. */
2793
2794static bool
2795vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
2796 loop_vec_info old_loop_vinfo)
2797{
2798 if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo))
2799 return false;
2800
2801 if (dump_enabled_p ())
2802 dump_printf_loc (MSG_NOTE, vect_location,
2803 "***** Preferring vector mode %s to vector mode %s\n",
2804 GET_MODE_NAME (new_loop_vinfo->vector_mode)mode_name[new_loop_vinfo->vector_mode],
2805 GET_MODE_NAME (old_loop_vinfo->vector_mode)mode_name[old_loop_vinfo->vector_mode]);
2806 return true;
2807}
2808
2809/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is
2810 not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
2811 MODE_I to the next mode useful to analyze.
2812 Return the loop_vinfo on success and wrapped null on failure. */
2813
2814static opt_loop_vec_info
2815vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
2816 const vect_loop_form_info *loop_form_info,
2817 loop_vec_info main_loop_vinfo,
2818 const vector_modes &vector_modes, unsigned &mode_i,
2819 machine_mode &autodetected_vector_mode,
2820 bool &fatal)
2821{
2822 loop_vec_info loop_vinfo
2823 = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo);
2824
2825 machine_mode vector_mode = vector_modes[mode_i];
2826 loop_vinfo->vector_mode = vector_mode;
2827
2828 /* Run the main analysis. */
2829 opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal);
2830 if (dump_enabled_p ())
2831 dump_printf_loc (MSG_NOTE, vect_location,
2832 "***** Analysis %s with vector mode %s\n",
2833 res ? "succeeded" : " failed",
2834 GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]);
2835
2836 /* Remember the autodetected vector mode. */
2837 if (vector_mode == VOIDmode((void) 0, E_VOIDmode))
2838 autodetected_vector_mode = loop_vinfo->vector_mode;
2839
2840 /* Advance mode_i, first skipping modes that would result in the
2841 same analysis result. */
2842 while (mode_i + 1 < vector_modes.length ()
2843 && vect_chooses_same_modes_p (loop_vinfo,
2844 vector_modes[mode_i + 1]))
2845 {
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_NOTE, vect_location,
2848 "***** The result for vector mode %s would"
2849 " be the same\n",
2850 GET_MODE_NAME (vector_modes[mode_i + 1])mode_name[vector_modes[mode_i + 1]]);
2851 mode_i += 1;
2852 }
2853 if (mode_i + 1 < vector_modes.length ()
2854 && VECTOR_MODE_P (autodetected_vector_mode)(((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_BOOL
|| ((enum mode_class) mode_class[autodetected_vector_mode]) ==
MODE_VECTOR_INT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_UACCUM)
2855 && (related_vector_mode (vector_modes[mode_i + 1],
2856 GET_MODE_INNER (autodetected_vector_mode)(mode_to_inner (autodetected_vector_mode)))
2857 == autodetected_vector_mode)
2858 && (related_vector_mode (autodetected_vector_mode,
2859 GET_MODE_INNER (vector_modes[mode_i + 1])(mode_to_inner (vector_modes[mode_i + 1])))
2860 == vector_modes[mode_i + 1]))
2861 {
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_NOTE, vect_location,
2864 "***** Skipping vector mode %s, which would"
2865 " repeat the analysis for %s\n",
2866 GET_MODE_NAME (vector_modes[mode_i + 1])mode_name[vector_modes[mode_i + 1]],
2867 GET_MODE_NAME (autodetected_vector_mode)mode_name[autodetected_vector_mode]);
2868 mode_i += 1;
2869 }
2870 mode_i++;
2871
2872 if (!res)
2873 {
2874 delete loop_vinfo;
2875 if (fatal)
2876 gcc_checking_assert (main_loop_vinfo == NULL)((void)(!(main_loop_vinfo == nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2876, __FUNCTION__), 0 : 0))
;
2877 return opt_loop_vec_info::propagate_failure (res);
2878 }
2879
2880 return opt_loop_vec_info::success (loop_vinfo);
2881}
2882
2883/* Function vect_analyze_loop.
2884
2885 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2886 for it. The different analyses will record information in the
2887 loop_vec_info struct. */
2888opt_loop_vec_info
2889vect_analyze_loop (class loop *loop, vec_info_shared *shared)
2890{
2891 DUMP_VECT_SCOPE ("analyze_loop_nest")auto_dump_scope scope ("analyze_loop_nest", vect_location);
2892
2893 if (loop_outer (loop)
2894 && loop_vec_info_for_loop (loop_outer (loop))
2895 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))(loop_vec_info_for_loop (loop_outer (loop)))->vectorizable)
2896 return opt_loop_vec_info::failure_at (vect_location,
2897 "outer-loop already vectorized.\n");
2898
2899 if (!find_loop_nest (loop, &shared->loop_nest))
2900 return opt_loop_vec_info::failure_at
2901 (vect_location,
2902 "not vectorized: loop nest containing two or more consecutive inner"
2903 " loops cannot be vectorized\n");
2904
2905 /* Analyze the loop form. */
2906 vect_loop_form_info loop_form_info;
2907 opt_result res = vect_analyze_loop_form (loop, &loop_form_info);
2908 if (!res)
2909 {
2910 if (dump_enabled_p ())
2911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2912 "bad loop form.\n");
2913 return opt_loop_vec_info::propagate_failure (res);
2914 }
2915 if (!integer_onep (loop_form_info.assumptions))
2916 {
2917 /* We consider to vectorize this loop by versioning it under
2918 some assumptions. In order to do this, we need to clear
2919 existing information computed by scev and niter analyzer. */
2920 scev_reset_htab ();
2921 free_numbers_of_iterations_estimates (loop);
2922 /* Also set flag for this loop so that following scev and niter
2923 analysis are done under the assumptions. */
2924 loop_constraint_set (loop, LOOP_C_FINITE(1 << 1));
2925 }
2926
2927 auto_vector_modes vector_modes;
2928 /* Autodetect first vector size we try. */
2929 vector_modes.safe_push (VOIDmode((void) 0, E_VOIDmode));
2930 unsigned int autovec_flags
2931 = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
2932 loop->simdlen != 0);
2933 bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
2934 && !unlimited_cost_model (loop));
2935 machine_mode autodetected_vector_mode = VOIDmode((void) 0, E_VOIDmode);
2936 opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
2937 unsigned int mode_i = 0;
2938 unsigned int first_loop_i = 0;
2939 unsigned int first_loop_next_i = 0;
2940 unsigned HOST_WIDE_INTlong simdlen = loop->simdlen;
2941
2942 /* First determine the main loop vectorization mode, either the first
2943 one that works, starting with auto-detecting the vector mode and then
2944 following the targets order of preference, or the one with the
2945 lowest cost if pick_lowest_cost_p. */
2946 while (1)
2947 {
2948 unsigned int loop_vinfo_i = mode_i;
2949 bool fatal;
2950 opt_loop_vec_info loop_vinfo
2951 = vect_analyze_loop_1 (loop, shared, &loop_form_info,
2952 NULLnullptr, vector_modes, mode_i,
2953 autodetected_vector_mode, fatal);
2954 if (fatal)
2955 break;
2956
2957 if (loop_vinfo)
2958 {
2959 /* Once we hit the desired simdlen for the first time,
2960 discard any previous attempts. */
2961 if (simdlen
2962 && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen)(!maybe_ne ((loop_vinfo)->vectorization_factor, simdlen)))
2963 {
2964 delete first_loop_vinfo;
2965 first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
2966 simdlen = 0;
2967 }
2968 else if (pick_lowest_cost_p
2969 && first_loop_vinfo
2970 && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
2971 {
2972 /* Pick loop_vinfo over first_loop_vinfo. */
2973 delete first_loop_vinfo;
2974 first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
2975 }
2976 if (first_loop_vinfo == NULLnullptr)
2977 {
2978 first_loop_vinfo = loop_vinfo;
2979 first_loop_i = loop_vinfo_i;
2980 first_loop_next_i = mode_i;
2981 }
2982 else
2983 {
2984 delete loop_vinfo;
2985 loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
2986 }
2987
2988 /* Commit to first_loop_vinfo if we have no reason to try
2989 alternatives. */
2990 if (!simdlen && !pick_lowest_cost_p)
2991 break;
2992 }
2993 if (mode_i == vector_modes.length ()
2994 || autodetected_vector_mode == VOIDmode((void) 0, E_VOIDmode))
2995 break;
2996
2997 /* Try the next biggest vector size. */
2998 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_NOTE, vect_location,
3000 "***** Re-trying analysis with vector mode %s\n",
3001 GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]);
3002 }
3003 if (!first_loop_vinfo)
3004 return opt_loop_vec_info::propagate_failure (res);
3005
3006 if (dump_enabled_p ())
3007 dump_printf_loc (MSG_NOTE, vect_location,
3008 "***** Choosing vector mode %s\n",
3009 GET_MODE_NAME (first_loop_vinfo->vector_mode)mode_name[first_loop_vinfo->vector_mode]);
3010
3011 /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
3012 enabled, SIMDUID is not set, it is the innermost loop and we have
3013 either already found the loop's SIMDLEN or there was no SIMDLEN to
3014 begin with.
3015 TODO: Enable epilogue vectorization for loops with SIMDUID set. */
3016 bool vect_epilogues = (!simdlen
3017 && loop->inner == NULLnullptr
3018 && param_vect_epilogues_nomaskglobal_options.x_param_vect_epilogues_nomask
3019 && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)(first_loop_vinfo)->peeling_for_niter
3020 && !loop->simduid);
3021 if (!vect_epilogues)
3022 return first_loop_vinfo;
3023
3024 /* Now analyze first_loop_vinfo for epilogue vectorization. */
3025 poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold;
3026
3027 /* Handle the case that the original loop can use partial
3028 vectorization, but want to only adopt it for the epilogue.
3029 The retry should be in the same mode as original. */
3030 if (LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (first_loop_vinfo)(first_loop_vinfo)->epil_using_partial_vectors_p)
3031 {
3032 gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (first_loop_vinfo)((void)(!((first_loop_vinfo)->can_use_partial_vectors_p &&
!(first_loop_vinfo)->using_partial_vectors_p) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3033, __FUNCTION__), 0 : 0))
3033 && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (first_loop_vinfo))((void)(!((first_loop_vinfo)->can_use_partial_vectors_p &&
!(first_loop_vinfo)->using_partial_vectors_p) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3033, __FUNCTION__), 0 : 0))
;
3034 if (dump_enabled_p ())
3035 dump_printf_loc (MSG_NOTE, vect_location,
3036 "***** Re-trying analysis with same vector mode"
3037 " %s for epilogue with partial vectors.\n",
3038 GET_MODE_NAME (first_loop_vinfo->vector_mode)mode_name[first_loop_vinfo->vector_mode]);
3039 mode_i = first_loop_i;
3040 }
3041 else
3042 {
3043 mode_i = first_loop_next_i;
3044 if (mode_i == vector_modes.length ())
3045 return first_loop_vinfo;
3046 }
3047
3048 /* ??? If first_loop_vinfo was using VOIDmode then we probably
3049 want to instead search for the corresponding mode in vector_modes[]. */
3050
3051 while (1)
3052 {
3053 bool fatal;
3054 opt_loop_vec_info loop_vinfo
3055 = vect_analyze_loop_1 (loop, shared, &loop_form_info,
3056 first_loop_vinfo,
3057 vector_modes, mode_i,
3058 autodetected_vector_mode, fatal);
3059 if (fatal)
3060 break;
3061
3062 if (loop_vinfo)
3063 {
3064 if (pick_lowest_cost_p)
3065 {
3066 /* Keep trying to roll back vectorization attempts while the
3067 loop_vec_infos they produced were worse than this one. */
3068 vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos;
3069 while (!vinfos.is_empty ()
3070 && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ()))
3071 {
3072 gcc_assert (vect_epilogues)((void)(!(vect_epilogues) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3072, __FUNCTION__), 0 : 0))
;
3073 delete vinfos.pop ();
3074 }
3075 }
3076 /* For now only allow one epilogue loop. */
3077 if (first_loop_vinfo->epilogue_vinfos.is_empty ())
3078 {
3079 first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
3080 poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold;
3081 gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () >
0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || (
loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo
)->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th,
0U)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3082, __FUNCTION__), 0 : 0))
3082 || maybe_ne (lowest_th, 0U))((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () >
0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || (
loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo
)->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th,
0U)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3082, __FUNCTION__), 0 : 0))
;
3083 /* Keep track of the known smallest versioning
3084 threshold. */
3085 if (ordered_p (lowest_th, th))
3086 lowest_th = ordered_min (lowest_th, th);
3087 }
3088 else
3089 {
3090 delete loop_vinfo;
3091 loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3092 }
3093
3094 /* For now only allow one epilogue loop, but allow
3095 pick_lowest_cost_p to replace it, so commit to the
3096 first epilogue if we have no reason to try alternatives. */
3097 if (!pick_lowest_cost_p)
3098 break;
3099 }
3100
3101 if (mode_i == vector_modes.length ())
3102 break;
3103
3104 /* Try the next biggest vector size. */
3105 if (dump_enabled_p ())
3106 dump_printf_loc (MSG_NOTE, vect_location,
3107 "***** Re-trying epilogue analysis with vector "
3108 "mode %s\n", GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]);
3109 }
3110
3111 if (!first_loop_vinfo->epilogue_vinfos.is_empty ())
3112 {
3113 LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold = lowest_th;
3114 if (dump_enabled_p ())
3115 dump_printf_loc (MSG_NOTE, vect_location,
3116 "***** Choosing epilogue vector mode %s\n",
3117 GET_MODE_NAMEmode_name[first_loop_vinfo->epilogue_vinfos[0]->vector_mode
]
3118 (first_loop_vinfo->epilogue_vinfos[0]->vector_mode)mode_name[first_loop_vinfo->epilogue_vinfos[0]->vector_mode
]
);
3119 }
3120
3121 return first_loop_vinfo;
3122}
3123
3124/* Return true if there is an in-order reduction function for CODE, storing
3125 it in *REDUC_FN if so. */
3126
3127static bool
3128fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn)
3129{
3130 switch (code)
3131 {
3132 case PLUS_EXPR:
3133 *reduc_fn = IFN_FOLD_LEFT_PLUS;
3134 return true;
3135
3136 default:
3137 return false;
3138 }
3139}
3140
3141/* Function reduction_fn_for_scalar_code
3142
3143 Input:
3144 CODE - tree_code of a reduction operations.
3145
3146 Output:
3147 REDUC_FN - the corresponding internal function to be used to reduce the
3148 vector of partial results into a single scalar result, or IFN_LAST
3149 if the operation is a supported reduction operation, but does not have
3150 such an internal function.
3151
3152 Return FALSE if CODE currently cannot be vectorized as reduction. */
3153
3154bool
3155reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
3156{
3157 switch (code)
3158 {
3159 case MAX_EXPR:
3160 *reduc_fn = IFN_REDUC_MAX;
3161 return true;
3162
3163 case MIN_EXPR:
3164 *reduc_fn = IFN_REDUC_MIN;
3165 return true;
3166
3167 case PLUS_EXPR:
3168 *reduc_fn = IFN_REDUC_PLUS;
3169 return true;
3170
3171 case BIT_AND_EXPR:
3172 *reduc_fn = IFN_REDUC_AND;
3173 return true;
3174
3175 case BIT_IOR_EXPR:
3176 *reduc_fn = IFN_REDUC_IOR;
3177 return true;
3178
3179 case BIT_XOR_EXPR:
3180 *reduc_fn = IFN_REDUC_XOR;
3181 return true;
3182
3183 case MULT_EXPR:
3184 case MINUS_EXPR:
3185 *reduc_fn = IFN_LAST;
3186 return true;
3187
3188 default:
3189 return false;
3190 }
3191}
3192
3193/* If there is a neutral value X such that a reduction would not be affected
3194 by the introduction of additional X elements, return that X, otherwise
3195 return null. CODE is the code of the reduction and SCALAR_TYPE is type
3196 of the scalar elements. If the reduction has just a single initial value
3197 then INITIAL_VALUE is that value, otherwise it is null. */
3198
3199tree
3200neutral_op_for_reduction (tree scalar_type, tree_code code, tree initial_value)
3201{
3202 switch (code)
3203 {
3204 case WIDEN_SUM_EXPR:
3205 case DOT_PROD_EXPR:
3206 case SAD_EXPR:
3207 case PLUS_EXPR:
3208 case MINUS_EXPR:
3209 case BIT_IOR_EXPR:
3210 case BIT_XOR_EXPR:
3211 return build_zero_cst (scalar_type);
3212
3213 case MULT_EXPR:
3214 return build_one_cst (scalar_type);
3215
3216 case BIT_AND_EXPR:
3217 return build_all_ones_cst (scalar_type);
3218
3219 case MAX_EXPR:
3220 case MIN_EXPR:
3221 return initial_value;
3222
3223 default:
3224 return NULL_TREE(tree) nullptr;
3225 }
3226}
3227
3228/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
3229 STMT is printed with a message MSG. */
3230
3231static void
3232report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
3233{
3234 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
3235}
3236
3237/* Return true if we need an in-order reduction for operation CODE
3238 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
3239 overflow must wrap. */
3240
3241bool
3242needs_fold_left_reduction_p (tree type, tree_code code)
3243{
3244 /* CHECKME: check for !flag_finite_math_only too? */
3245 if (SCALAR_FLOAT_TYPE_P (type)(((enum tree_code) (type)->base.code) == REAL_TYPE))
3246 switch (code)
3247 {
3248 case MIN_EXPR:
3249 case MAX_EXPR:
3250 return false;
3251
3252 default:
3253 return !flag_associative_mathglobal_options.x_flag_associative_math;
3254 }
3255
3256 if (INTEGRAL_TYPE_P (type)(((enum tree_code) (type)->base.code) == ENUMERAL_TYPE || (
(enum tree_code) (type)->base.code) == BOOLEAN_TYPE || ((enum
tree_code) (type)->base.code) == INTEGER_TYPE)
)
3257 {
3258 if (!operation_no_trapping_overflow (type, code))
3259 return true;
3260 return false;
3261 }
3262
3263 if (SAT_FIXED_POINT_TYPE_P (type)(((enum tree_code) (type)->base.code) == FIXED_POINT_TYPE &&
((tree_not_check4 ((type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3263, __FUNCTION__, (RECORD_TYPE), (UNION_TYPE), (QUAL_UNION_TYPE
), (ARRAY_TYPE)))->base.u.bits.saturating_flag))
)
3264 return true;
3265
3266 return false;
3267}
3268
3269/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
3270 has a handled computation expression. Store the main reduction
3271 operation in *CODE. */
3272
3273static bool
3274check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
3275 tree loop_arg, enum tree_code *code,
3276 vec<std::pair<ssa_op_iter, use_operand_p> > &path)
3277{
3278 auto_bitmap visited;
3279 tree lookfor = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
3280 ssa_op_iter curri;
3281 use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE0x01);
3282 while (USE_FROM_PTR (curr)get_use_from_ptr (curr) != loop_arg)
3283 curr = op_iter_next_use (&curri);
3284 curri.i = curri.numops;
3285 do
3286 {
3287 path.safe_push (std::make_pair (curri, curr));
3288 tree use = USE_FROM_PTR (curr)get_use_from_ptr (curr);
3289 if (use == lookfor)
3290 break;
3291 gimple *def = SSA_NAME_DEF_STMT (use)(tree_check ((use), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3291, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
;
3292 if (gimple_nop_p (def)
3293 || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
3294 {
3295pop:
3296 do
3297 {
3298 std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
3299 curri = x.first;
3300 curr = x.second;
3301 do
3302 curr = op_iter_next_use (&curri);
3303 /* Skip already visited or non-SSA operands (from iterating
3304 over PHI args). */
3305 while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr)
3306 && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME
3307 || ! bitmap_set_bit (visited,
3308 SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3309, __FUNCTION__, (SSA_NAME)))->base.u.version
3309 (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3309, __FUNCTION__, (SSA_NAME)))->base.u.version
)));
3310 }
3311 while (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr) && ! path.is_empty ());
3312 if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr))
3313 break;
3314 }
3315 else
3316 {
3317 if (gimple_code (def) == GIMPLE_PHI)
3318 curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE0x01);
3319 else
3320 curr = op_iter_init_use (&curri, def, SSA_OP_USE0x01);
3321 while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr)
3322 && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME
3323 || ! bitmap_set_bit (visited,
3324 SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3325, __FUNCTION__, (SSA_NAME)))->base.u.version
3325 (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3325, __FUNCTION__, (SSA_NAME)))->base.u.version
)))
3326 curr = op_iter_next_use (&curri);
3327 if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr))
3328 goto pop;
3329 }
3330 }
3331 while (1);
3332 if (dump_file && (dump_flags & TDF_DETAILS))
3333 {
3334 dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
3335 unsigned i;
3336 std::pair<ssa_op_iter, use_operand_p> *x;
3337 FOR_EACH_VEC_ELT (path, i, x)for (i = 0; (path).iterate ((i), &(x)); ++(i))
3338 dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second)get_use_from_ptr (x->second));
3339 dump_printf (MSG_NOTE, "\n");
3340 }
3341
3342 /* Check whether the reduction path detected is valid. */
3343 bool fail = path.length () == 0;
3344 bool neg = false;
3345 int sign = -1;
3346 *code = ERROR_MARK;
3347 for (unsigned i = 1; i < path.length (); ++i)
3348 {
3349 gimple *use_stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt;
3350 tree op = USE_FROM_PTR (path[i].second)get_use_from_ptr (path[i].second);
3351 if (! is_gimple_assign (use_stmt)
3352 /* The following make sure we can compute the operand index
3353 easily plus it mostly disallows chaining via COND_EXPR condition
3354 operands. */
3355 || (gimple_assign_rhs1_ptr (use_stmt) != path[i].second->use
3356 && (gimple_num_ops (use_stmt) <= 2
3357 || gimple_assign_rhs2_ptr (use_stmt) != path[i].second->use)
3358 && (gimple_num_ops (use_stmt) <= 3
3359 || gimple_assign_rhs3_ptr (use_stmt) != path[i].second->use)))
3360 {
3361 fail = true;
3362 break;
3363 }
3364 tree_code use_code = gimple_assign_rhs_code (use_stmt);
3365 if (use_code == MINUS_EXPR)
3366 {
3367 use_code = PLUS_EXPR;
3368 /* Track whether we negate the reduction value each iteration. */
3369 if (gimple_assign_rhs2 (use_stmt) == op)
3370 neg = ! neg;
3371 }
3372 if (CONVERT_EXPR_CODE_P (use_code)((use_code) == NOP_EXPR || (use_code) == CONVERT_EXPR)
3373 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (use_stmt))((contains_struct_check ((gimple_assign_lhs (use_stmt)), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3373, __FUNCTION__))->typed.type)
,
3374 TREE_TYPE (gimple_assign_rhs1 (use_stmt))((contains_struct_check ((gimple_assign_rhs1 (use_stmt)), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3374, __FUNCTION__))->typed.type)
))
3375 ;
3376 else if (*code == ERROR_MARK)
3377 {
3378 *code = use_code;
3379 sign = TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)))((signop) ((tree_class_check ((((contains_struct_check ((gimple_assign_lhs
(use_stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3379, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3379, __FUNCTION__))->base.u.bits.unsigned_flag))
;
3380 }
3381 else if (use_code != *code)
3382 {
3383 fail = true;
3384 break;
3385 }
3386 else if ((use_code == MIN_EXPR
3387 || use_code == MAX_EXPR)
3388 && sign != TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)))((signop) ((tree_class_check ((((contains_struct_check ((gimple_assign_lhs
(use_stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3388, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3388, __FUNCTION__))->base.u.bits.unsigned_flag))
)
3389 {
3390 fail = true;
3391 break;
3392 }
3393 /* Check there's only a single stmt the op is used on. For the
3394 not value-changing tail and the last stmt allow out-of-loop uses.
3395 ??? We could relax this and handle arbitrary live stmts by
3396 forcing a scalar epilogue for example. */
3397 imm_use_iterator imm_iter;
3398 gimple *op_use_stmt;
3399 unsigned cnt = 0;
3400 FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)for (struct auto_end_imm_use_stmt_traverse auto_end_imm_use_stmt_traverse
((((op_use_stmt) = first_imm_use_stmt (&(imm_iter), (op)
)), &(imm_iter))); !end_imm_use_stmt_p (&(imm_iter));
(void) ((op_use_stmt) = next_imm_use_stmt (&(imm_iter)))
)
3401 if (!is_gimple_debug (op_use_stmt)
3402 && (*code != ERROR_MARK
3403 || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))))
3404 {
3405 /* We want to allow x + x but not x < 1 ? x : 2. */
3406 if (is_gimple_assign (op_use_stmt)
3407 && gimple_assign_rhs_code (op_use_stmt) == COND_EXPR)
3408 {
3409 use_operand_p use_p;
3410 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)for ((use_p) = first_imm_use_on_stmt (&(imm_iter)); !end_imm_use_on_stmt_p
(&(imm_iter)); (void) ((use_p) = next_imm_use_on_stmt (&
(imm_iter))))
3411 cnt++;
3412 }
3413 else
3414 cnt++;
3415 }
3416 if (cnt != 1)
3417 {
3418 fail = true;
3419 break;
3420 }
3421 }
3422 return ! fail && ! neg && *code != ERROR_MARK;
3423}
3424
3425bool
3426check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
3427 tree loop_arg, enum tree_code code)
3428{
3429 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
3430 enum tree_code code_;
3431 return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path)
3432 && code_ == code);
3433}
3434
3435
3436
3437/* Function vect_is_simple_reduction
3438
3439 (1) Detect a cross-iteration def-use cycle that represents a simple
3440 reduction computation. We look for the following pattern:
3441
3442 loop_header:
3443 a1 = phi < a0, a2 >
3444 a3 = ...
3445 a2 = operation (a3, a1)
3446
3447 or
3448
3449 a3 = ...
3450 loop_header:
3451 a1 = phi < a0, a2 >
3452 a2 = operation (a3, a1)
3453
3454 such that:
3455 1. operation is commutative and associative and it is safe to
3456 change the order of the computation
3457 2. no uses for a2 in the loop (a2 is used out of the loop)
3458 3. no uses of a1 in the loop besides the reduction operation
3459 4. no uses of a1 outside the loop.
3460
3461 Conditions 1,4 are tested here.
3462 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
3463
3464 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
3465 nested cycles.
3466
3467 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
3468 reductions:
3469
3470 a1 = phi < a0, a2 >
3471 inner loop (def of a3)
3472 a2 = phi < a3 >
3473
3474 (4) Detect condition expressions, ie:
3475 for (int i = 0; i < N; i++)
3476 if (a[i] < val)
3477 ret_val = a[i];
3478
3479*/
3480
3481static stmt_vec_info
3482vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
3483 bool *double_reduc, bool *reduc_chain_p)
3484{
3485 gphi *phi = as_a <gphi *> (phi_info->stmt);
3486 gimple *phi_use_stmt = NULLnullptr;
3487 imm_use_iterator imm_iter;
3488 use_operand_p use_p;
3489
3490 *double_reduc = false;
3491 *reduc_chain_p = false;
3492 STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = TREE_CODE_REDUCTION;
3493
3494 tree phi_name = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
3495 /* ??? If there are no uses of the PHI result the inner loop reduction
3496 won't be detected as possibly double-reduction by vectorizable_reduction
3497 because that tries to walk the PHI arg from the preheader edge which
3498 can be constant. See PR60382. */
3499 if (has_zero_uses (phi_name))
3500 return NULLnullptr;
3501 class loop *loop = (gimple_bb (phi))->loop_father;
3502 unsigned nphi_def_loop_uses = 0;
3503 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)for ((use_p) = first_readonly_imm_use (&(imm_iter), (phi_name
)); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p
) = next_readonly_imm_use (&(imm_iter))))
3504 {
3505 gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt;
3506 if (is_gimple_debug (use_stmt))
3507 continue;
3508
3509 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
3510 {
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3513 "intermediate value used outside loop.\n");
3514
3515 return NULLnullptr;
3516 }
3517
3518 nphi_def_loop_uses++;
3519 phi_use_stmt = use_stmt;
3520 }
3521
3522 tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop))gimple_phi_arg_def (((phi)), ((loop_latch_edge (loop))->dest_idx
))
;
3523 if (TREE_CODE (latch_def)((enum tree_code) (latch_def)->base.code) != SSA_NAME)
3524 {
3525 if (dump_enabled_p ())
3526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3527 "reduction: not ssa_name: %T\n", latch_def);
3528 return NULLnullptr;
3529 }
3530
3531 stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def);
3532 if (!def_stmt_info
3533 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
3534 return NULLnullptr;
3535
3536 bool nested_in_vect_loop
3537 = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info)(loop_info)->loop, loop);
3538 unsigned nlatch_def_loop_uses = 0;
3539 auto_vec<gphi *, 3> lcphis;
3540 bool inner_loop_of_double_reduc = false;
3541 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (latch_def
)); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p
) = next_readonly_imm_use (&(imm_iter))))
3542 {
3543 gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt;
3544 if (is_gimple_debug (use_stmt))
3545 continue;
3546 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
3547 nlatch_def_loop_uses++;
3548 else
3549 {
3550 /* We can have more than one loop-closed PHI. */
3551 lcphis.safe_push (as_a <gphi *> (use_stmt));
3552 if (nested_in_vect_loop
3553 && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))(loop_info->lookup_stmt (use_stmt))->def_type
3554 == vect_double_reduction_def))
3555 inner_loop_of_double_reduc = true;
3556 }
3557 }
3558
3559 /* If we are vectorizing an inner reduction we are executing that
3560 in the original order only in case we are not dealing with a
3561 double reduction. */
3562 if (nested_in_vect_loop && !inner_loop_of_double_reduc)
3563 {
3564 if (dump_enabled_p ())
3565 report_vect_op (MSG_NOTE, def_stmt_info->stmt,
3566 "detected nested cycle: ");
3567 return def_stmt_info;
3568 }
3569
3570 /* When the inner loop of a double reduction ends up with more than
3571 one loop-closed PHI we have failed to classify alternate such
3572 PHIs as double reduction, leading to wrong code. See PR103237. */
3573 if (inner_loop_of_double_reduc && lcphis.length () != 1)
3574 {
3575 if (dump_enabled_p ())
3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3577 "unhandle double reduction\n");
3578 return NULLnullptr;
3579 }
3580
3581 /* If this isn't a nested cycle or if the nested cycle reduction value
3582 is used ouside of the inner loop we cannot handle uses of the reduction
3583 value. */
3584 if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)
3585 {
3586 if (dump_enabled_p ())
3587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3588 "reduction used in loop.\n");
3589 return NULLnullptr;
3590 }
3591
3592 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
3593 defined in the inner loop. */
3594 if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
3595 {
3596 tree op1 = PHI_ARG_DEF (def_stmt, 0)gimple_phi_arg_def ((def_stmt), (0));
3597 if (gimple_phi_num_args (def_stmt) != 1
3598 || TREE_CODE (op1)((enum tree_code) (op1)->base.code) != SSA_NAME)
3599 {
3600 if (dump_enabled_p ())
3601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3602 "unsupported phi node definition.\n");
3603
3604 return NULLnullptr;
3605 }
3606
3607 gimple *def1 = SSA_NAME_DEF_STMT (op1)(tree_check ((op1), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3607, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
;
3608 if (gimple_bb (def1)
3609 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3610 && loop->inner
3611 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
3612 && is_gimple_assign (def1)
3613 && is_a <gphi *> (phi_use_stmt)
3614 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
3615 {
3616 if (dump_enabled_p ())
3617 report_vect_op (MSG_NOTE, def_stmt,
3618 "detected double reduction: ");
3619
3620 *double_reduc = true;
3621 return def_stmt_info;
3622 }
3623
3624 return NULLnullptr;
3625 }
3626
3627 /* Look for the expression computing latch_def from then loop PHI result. */
3628 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
3629 enum tree_code code;
3630 if (check_reduction_path (vect_location, loop, phi, latch_def, &code,
3631 path))
3632 {
3633 STMT_VINFO_REDUC_CODE (phi_info)(phi_info)->reduc_code = code;
3634 if (code == COND_EXPR && !nested_in_vect_loop)
3635 STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = COND_REDUCTION;
3636
3637 /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP
3638 reduction chain for which the additional restriction is that
3639 all operations in the chain are the same. */
3640 auto_vec<stmt_vec_info, 8> reduc_chain;
3641 unsigned i;
3642 bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
3643 for (i = path.length () - 1; i >= 1; --i)
3644 {
3645 gimple *stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt;
3646 stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
3647 STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx
3648 = path[i].second->use - gimple_assign_rhs1_ptr (stmt);
3649 enum tree_code stmt_code = gimple_assign_rhs_code (stmt);
3650 bool leading_conversion = (CONVERT_EXPR_CODE_P (stmt_code)((stmt_code) == NOP_EXPR || (stmt_code) == CONVERT_EXPR)
3651 && (i == 1 || i == path.length () - 1));
3652 if ((stmt_code != code && !leading_conversion)
3653 /* We can only handle the final value in epilogue
3654 generation for reduction chains. */
3655 || (i != 1 && !has_single_use (gimple_assign_lhs (stmt))))
3656 is_slp_reduc = false;
3657 /* For reduction chains we support a trailing/leading
3658 conversions. We do not store those in the actual chain. */
3659 if (leading_conversion)
3660 continue;
3661 reduc_chain.safe_push (stmt_info);
3662 }
3663 if (is_slp_reduc && reduc_chain.length () > 1)
3664 {
3665 for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
3666 {
3667 REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3667, __FUNCTION__), 0 : 0)), (reduc_chain[i])->first_element
)
= reduc_chain[0];
3668 REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3668, __FUNCTION__), 0 : 0)), (reduc_chain[i])->next_element
)
= reduc_chain[i+1];
3669 }
3670 REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3670, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->first_element
)
= reduc_chain[0];
3671 REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3671, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->next_element
)
= NULLnullptr;
3672
3673 /* Save the chain for further analysis in SLP detection. */
3674 LOOP_VINFO_REDUCTION_CHAINS (loop_info)(loop_info)->reduction_chains.safe_push (reduc_chain[0]);
3675 REDUC_GROUP_SIZE (reduc_chain[0])(((void)(!(!(reduc_chain[0])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3675, __FUNCTION__), 0 : 0)), (reduc_chain[0])->size)
= reduc_chain.length ();
3676
3677 *reduc_chain_p = true;
3678 if (dump_enabled_p ())
3679 dump_printf_loc (MSG_NOTE, vect_location,
3680 "reduction: detected reduction chain\n");
3681 }
3682 else if (dump_enabled_p ())
3683 dump_printf_loc (MSG_NOTE, vect_location,
3684 "reduction: detected reduction\n");
3685
3686 return def_stmt_info;
3687 }
3688
3689 if (dump_enabled_p ())
3690 dump_printf_loc (MSG_NOTE, vect_location,
3691 "reduction: unknown pattern\n");
3692
3693 return NULLnullptr;
3694}
3695
3696/* Estimate the number of peeled epilogue iterations for LOOP_VINFO.
3697 PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations,
3698 or -1 if not known. */
3699
3700static int
3701vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue)
3702{
3703 int assumed_vf = vect_vf_for_cost (loop_vinfo);
3704 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
|| peel_iters_prologue == -1)
3705 {
3706 if (dump_enabled_p ())
3707 dump_printf_loc (MSG_NOTE, vect_location,
3708 "cost model: epilogue peel iters set to vf/2 "
3709 "because loop iterations are unknown .\n");
3710 return assumed_vf / 2;
3711 }
3712 else
3713 {
3714 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3714, __FUNCTION__))))
;
3715 peel_iters_prologue = MIN (niters, peel_iters_prologue)((niters) < (peel_iters_prologue) ? (niters) : (peel_iters_prologue
))
;
3716 int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
3717 /* If we need to peel for gaps, but no peeling is required, we have to
3718 peel VF iterations. */
3719 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps && !peel_iters_epilogue)
3720 peel_iters_epilogue = assumed_vf;
3721 return peel_iters_epilogue;
3722 }
3723}
3724
3725/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
3726int
3727vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
3728 int *peel_iters_epilogue,
3729 stmt_vector_for_cost *scalar_cost_vec,
3730 stmt_vector_for_cost *prologue_cost_vec,
3731 stmt_vector_for_cost *epilogue_cost_vec)
3732{
3733 int retval = 0;
3734
3735 *peel_iters_epilogue
3736 = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue);
3737
3738 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
)
3739 {
3740 /* If peeled iterations are known but number of scalar loop
3741 iterations are unknown, count a taken branch per peeled loop. */
3742 if (peel_iters_prologue > 0)
3743 retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3744 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3745 if (*peel_iters_epilogue > 0)
3746 retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
3747 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_epilogue);
3748 }
3749
3750 stmt_info_for_cost *si;
3751 int j;
3752 if (peel_iters_prologue)
3753 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j
))
3754 retval += record_stmt_cost (prologue_cost_vec,
3755 si->count * peel_iters_prologue,
3756 si->kind, si->stmt_info, si->misalign,
3757 vect_prologue);
3758 if (*peel_iters_epilogue)
3759 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j
))
3760 retval += record_stmt_cost (epilogue_cost_vec,
3761 si->count * *peel_iters_epilogue,
3762 si->kind, si->stmt_info, si->misalign,
3763 vect_epilogue);
3764
3765 return retval;
3766}
3767
3768/* Function vect_estimate_min_profitable_iters
3769
3770 Return the number of iterations required for the vector version of the
3771 loop to be profitable relative to the cost of the scalar version of the
3772 loop.
3773
3774 *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
3775 of iterations for vectorization. -1 value means loop vectorization
3776 is not profitable. This returned value may be used for dynamic
3777 profitability check.
3778
3779 *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
3780 for static check against estimated number of iterations. */
3781
3782static void
3783vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
3784 int *ret_min_profitable_niters,
3785 int *ret_min_profitable_estimate)
3786{
3787 int min_profitable_iters;
3788 int min_profitable_estimate;
3789 int peel_iters_prologue;
3790 int peel_iters_epilogue;
3791 unsigned vec_inside_cost = 0;
3792 int vec_outside_cost = 0;
3793 unsigned vec_prologue_cost = 0;
3794 unsigned vec_epilogue_cost = 0;
3795 int scalar_single_iter_cost = 0;
3796 int scalar_outside_cost = 0;
3797 int assumed_vf = vect_vf_for_cost (loop_vinfo);
3798 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment;
3799 vector_costs *target_cost_data = loop_vinfo->vector_costs;
3800
3801 /* Cost model disabled. */
3802 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop))
3803 {
3804 if (dump_enabled_p ())
3805 dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3806 *ret_min_profitable_niters = 0;
3807 *ret_min_profitable_estimate = 0;
3808 return;
3809 }
3810
3811 /* Requires loop versioning tests to handle misalignment. */
3812 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)((loop_vinfo)->may_misalign_stmts.length () > 0))
3813 {
3814 /* FIXME: Make cost depend on complexity of individual check. */
3815 unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.length ();
3816 (void) add_stmt_cost (target_cost_data, len, vector_stmt,
3817 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3818 if (dump_enabled_p ())
3819 dump_printf (MSG_NOTE,
3820 "cost model: Adding cost of checks for loop "
3821 "versioning to treat misalignment.\n");
3822 }
3823
3824 /* Requires loop versioning with alias checks. */
3825 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)((loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0)
)
3826 {
3827 /* FIXME: Make cost depend on complexity of individual check. */
3828 unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.length ();
3829 (void) add_stmt_cost (target_cost_data, len, vector_stmt,
3830 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3831 len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.length ();
3832 if (len)
3833 /* Count LEN - 1 ANDs and LEN comparisons. */
3834 (void) add_stmt_cost (target_cost_data, len * 2 - 1,
3835 scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3836 len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.length ();
3837 if (len)
3838 {
3839 /* Count LEN - 1 ANDs and LEN comparisons. */
3840 unsigned int nstmts = len * 2 - 1;
3841 /* +1 for each bias that needs adding. */
3842 for (unsigned int i = 0; i < len; ++i)
3843 if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds[i].unsigned_p)
3844 nstmts += 1;
3845 (void) add_stmt_cost (target_cost_data, nstmts,
3846 scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3847 }
3848 if (dump_enabled_p ())
3849 dump_printf (MSG_NOTE,
3850 "cost model: Adding cost of checks for loop "
3851 "versioning aliasing.\n");
3852 }
3853
3854 /* Requires loop versioning with niter checks. */
3855 if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo)((loop_vinfo)->num_iters_assumptions))
3856 {
3857 /* FIXME: Make cost depend on complexity of individual check. */
3858 (void) add_stmt_cost (target_cost_data, 1, vector_stmt,
3859 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3860 if (dump_enabled_p ())
3861 dump_printf (MSG_NOTE,
3862 "cost model: Adding cost of checks for loop "
3863 "versioning niters.\n");
3864 }
3865
3866 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
)
3867 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3868 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3869
3870 /* Count statements in scalar loop. Using this as scalar cost for a single
3871 iteration for now.
3872
3873 TODO: Add outer loop support.
3874
3875 TODO: Consider assigning different costs to different scalar
3876 statements. */
3877
3878 scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
3879
3880 /* Add additional cost for the peeled instructions in prologue and epilogue
3881 loop. (For fully-masked loops there will be no peeling.)
3882
3883 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3884 at compile-time - we assume it's vf/2 (the worst would be vf-1).
3885
3886 TODO: Build an expression that represents peel_iters for prologue and
3887 epilogue to be used in a run-time test. */
3888
3889 bool prologue_need_br_taken_cost = false;
3890 bool prologue_need_br_not_taken_cost = false;
3891
3892 /* Calculate peel_iters_prologue. */
3893 if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
3894 peel_iters_prologue = 0;
3895 else if (npeel < 0)
3896 {
3897 peel_iters_prologue = assumed_vf / 2;
3898 if (dump_enabled_p ())
3899 dump_printf (MSG_NOTE, "cost model: "
3900 "prologue peel iters set to vf/2.\n");
3901
3902 /* If peeled iterations are unknown, count a taken branch and a not taken
3903 branch per peeled loop. Even if scalar loop iterations are known,
3904 vector iterations are not known since peeled prologue iterations are
3905 not known. Hence guards remain the same. */
3906 prologue_need_br_taken_cost = true;
3907 prologue_need_br_not_taken_cost = true;
3908 }
3909 else
3910 {
3911 peel_iters_prologue = npeel;
3912 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
&& peel_iters_prologue > 0)
3913 /* If peeled iterations are known but number of scalar loop
3914 iterations are unknown, count a taken branch per peeled loop. */
3915 prologue_need_br_taken_cost = true;
3916 }
3917
3918 bool epilogue_need_br_taken_cost = false;
3919 bool epilogue_need_br_not_taken_cost = false;
3920
3921 /* Calculate peel_iters_epilogue. */
3922 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
3923 /* We need to peel exactly one iteration for gaps. */
3924 peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps ? 1 : 0;
3925 else if (npeel < 0)
3926 {
3927 /* If peeling for alignment is unknown, loop bound of main loop
3928 becomes unknown. */
3929 peel_iters_epilogue = assumed_vf / 2;
3930 if (dump_enabled_p ())
3931 dump_printf (MSG_NOTE, "cost model: "
3932 "epilogue peel iters set to vf/2 because "
3933 "peeling for alignment is unknown.\n");
3934
3935 /* See the same reason above in peel_iters_prologue calculation. */
3936 epilogue_need_br_taken_cost = true;
3937 epilogue_need_br_not_taken_cost = true;
3938 }
3939 else
3940 {
3941 peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, npeel);
3942 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
&& peel_iters_epilogue > 0)
3943 /* If peeled iterations are known but number of scalar loop
3944 iterations are unknown, count a taken branch per peeled loop. */
3945 epilogue_need_br_taken_cost = true;
3946 }
3947
3948 stmt_info_for_cost *si;
3949 int j;
3950 /* Add costs associated with peel_iters_prologue. */
3951 if (peel_iters_prologue)
3952 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
3953 {
3954 (void) add_stmt_cost (target_cost_data,
3955 si->count * peel_iters_prologue, si->kind,
3956 si->stmt_info, si->vectype, si->misalign,
3957 vect_prologue);
3958 }
3959
3960 /* Add costs associated with peel_iters_epilogue. */
3961 if (peel_iters_epilogue)
3962 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
3963 {
3964 (void) add_stmt_cost (target_cost_data,
3965 si->count * peel_iters_epilogue, si->kind,
3966 si->stmt_info, si->vectype, si->misalign,
3967 vect_epilogue);
3968 }
3969
3970 /* Add possible cond_branch_taken/cond_branch_not_taken cost. */
3971
3972 if (prologue_need_br_taken_cost)
3973 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3974 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3975
3976 if (prologue_need_br_not_taken_cost)
3977 (void) add_stmt_cost (target_cost_data, 1,
3978 cond_branch_not_taken, NULLnullptr, NULL_TREE(tree) nullptr, 0,
3979 vect_prologue);
3980
3981 if (epilogue_need_br_taken_cost)
3982 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3983 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_epilogue);
3984
3985 if (epilogue_need_br_not_taken_cost)
3986 (void) add_stmt_cost (target_cost_data, 1,
3987 cond_branch_not_taken, NULLnullptr, NULL_TREE(tree) nullptr, 0,
3988 vect_epilogue);
3989
3990 /* Take care of special costs for rgroup controls of partial vectors. */
3991 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo
)->masks.is_empty ())
)
3992 {
3993 /* Calculate how many masks we need to generate. */
3994 unsigned int num_masks = 0;
3995 rgroup_controls *rgm;
3996 unsigned int num_vectors_m1;
3997 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)for (num_vectors_m1 = 0; ((loop_vinfo)->masks).iterate ((num_vectors_m1
), &(rgm)); ++(num_vectors_m1))
3998 if (rgm->type)
3999 num_masks += num_vectors_m1 + 1;
4000 gcc_assert (num_masks > 0)((void)(!(num_masks > 0) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4000, __FUNCTION__), 0 : 0))
;
4001
4002 /* In the worst case, we need to generate each mask in the prologue
4003 and in the loop body. One of the loop body mask instructions
4004 replaces the comparison in the scalar loop, and since we don't
4005 count the scalar comparison against the scalar body, we shouldn't
4006 count that vector instruction against the vector body either.
4007
4008 Sometimes we can use unpacks instead of generating prologue
4009 masks and sometimes the prologue mask will fold to a constant,
4010 so the actual prologue cost might be smaller. However, it's
4011 simpler and safer to use the worst-case cost; if this ends up
4012 being the tie-breaker between vectorizing or not, then it's
4013 probably better not to vectorize. */
4014 (void) add_stmt_cost (target_cost_data, num_masks,
4015 vector_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
4016 (void) add_stmt_cost (target_cost_data, num_masks - 1,
4017 vector_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_body);
4018 }
4019 else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo
)->lens.is_empty ())
)
4020 {
4021 /* Referring to the functions vect_set_loop_condition_partial_vectors
4022 and vect_set_loop_controls_directly, we need to generate each
4023 length in the prologue and in the loop body if required. Although
4024 there are some possible optimizations, we consider the worst case
4025 here. */
4026
4027 bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
;
4028 bool need_iterate_p
4029 = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)
4030 && !vect_known_niters_smaller_than_vf (loop_vinfo));
4031
4032 /* Calculate how many statements to be added. */
4033 unsigned int prologue_stmts = 0;
4034 unsigned int body_stmts = 0;
4035
4036 rgroup_controls *rgc;
4037 unsigned int num_vectors_m1;
4038 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)for (num_vectors_m1 = 0; ((loop_vinfo)->lens).iterate ((num_vectors_m1
), &(rgc)); ++(num_vectors_m1))
4039 if (rgc->type)
4040 {
4041 /* May need one SHIFT for nitems_total computation. */
4042 unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor;
4043 if (nitems != 1 && !niters_known_p)
4044 prologue_stmts += 1;
4045
4046 /* May need one MAX and one MINUS for wrap around. */
4047 if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc))
4048 prologue_stmts += 2;
4049
4050 /* Need one MAX and one MINUS for each batch limit excepting for
4051 the 1st one. */
4052 prologue_stmts += num_vectors_m1 * 2;
4053
4054 unsigned int num_vectors = num_vectors_m1 + 1;
4055
4056 /* Need to set up lengths in prologue, only one MIN required
4057 for each since start index is zero. */
4058 prologue_stmts += num_vectors;
4059
4060 /* Each may need two MINs and one MINUS to update lengths in body
4061 for next iteration. */
4062 if (need_iterate_p)
4063 body_stmts += 3 * num_vectors;
4064 }
4065
4066 (void) add_stmt_cost (target_cost_data, prologue_stmts,
4067 scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
4068 (void) add_stmt_cost (target_cost_data, body_stmts,
4069 scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_body);
4070 }
4071
4072 /* FORNOW: The scalar outside cost is incremented in one of the
4073 following ways:
4074
4075 1. The vectorizer checks for alignment and aliasing and generates
4076 a condition that allows dynamic vectorization. A cost model
4077 check is ANDED with the versioning condition. Hence scalar code
4078 path now has the added cost of the versioning check.
4079
4080 if (cost > th & versioning_check)
4081 jmp to vector code
4082
4083 Hence run-time scalar is incremented by not-taken branch cost.
4084
4085 2. The vectorizer then checks if a prologue is required. If the
4086 cost model check was not done before during versioning, it has to
4087 be done before the prologue check.
4088
4089 if (cost <= th)
4090 prologue = scalar_iters
4091 if (prologue == 0)
4092 jmp to vector code
4093 else
4094 execute prologue
4095 if (prologue == num_iters)
4096 go to exit
4097
4098 Hence the run-time scalar cost is incremented by a taken branch,
4099 plus a not-taken branch, plus a taken branch cost.
4100
4101 3. The vectorizer then checks if an epilogue is required. If the
4102 cost model check was not done before during prologue check, it
4103 has to be done with the epilogue check.
4104
4105 if (prologue == 0)
4106 jmp to vector code
4107 else
4108 execute prologue
4109 if (prologue == num_iters)
4110 go to exit
4111 vector code:
4112 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
4113 jmp to epilogue
4114
4115 Hence the run-time scalar cost should be incremented by 2 taken
4116 branches.
4117
4118 TODO: The back end may reorder the BBS's differently and reverse
4119 conditions/branch directions. Change the estimates below to
4120 something more reasonable. */
4121
4122 /* If the number of iterations is known and we do not do versioning, we can
4123 decide whether to vectorize at compile time. Hence the scalar version
4124 do not carry cost model guard costs. */
4125 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
4126 || LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
)
4127 {
4128 /* Cost model check occurs at versioning. */
4129 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
)
4130 scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
4131 else
4132 {
4133 /* Cost model check occurs at prologue generation. */
4134 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0)
4135 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
4136 + vect_get_stmt_cost (cond_branch_not_taken);
4137 /* Cost model check occurs at epilogue generation. */
4138 else
4139 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
4140 }
4141 }
4142
4143 /* Complete the target-specific cost calculations. */
4144 finish_cost (loop_vinfo->vector_costs, loop_vinfo->scalar_costs,
4145 &vec_prologue_cost, &vec_inside_cost, &vec_epilogue_cost);
4146
4147 vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
4148
4149 if (dump_enabled_p ())
4150 {
4151 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
4152 dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n",
4153 vec_inside_cost);
4154 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n",
4155 vec_prologue_cost);
4156 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n",
4157 vec_epilogue_cost);
4158 dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n",
4159 scalar_single_iter_cost);
4160 dump_printf (MSG_NOTE, " Scalar outside cost: %d\n",
4161 scalar_outside_cost);
4162 dump_printf (MSG_NOTE, " Vector outside cost: %d\n",
4163 vec_outside_cost);
4164 dump_printf (MSG_NOTE, " prologue iterations: %d\n",
4165 peel_iters_prologue);
4166 dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
4167 peel_iters_epilogue);
4168 }
4169
4170 /* Calculate number of iterations required to make the vector version
4171 profitable, relative to the loop bodies only. The following condition
4172 must hold true:
4173 SIC * niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC
4174 where
4175 SIC = scalar iteration cost, VIC = vector iteration cost,
4176 VOC = vector outside cost, VF = vectorization factor,
4177 NPEEL = prologue iterations + epilogue iterations,
4178 SOC = scalar outside cost for run time cost model check. */
4179
4180 int saving_per_viter = (scalar_single_iter_cost * assumed_vf
4181 - vec_inside_cost);
4182 if (saving_per_viter <= 0)
4183 {
4184 if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->force_vectorize)
4185 warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd,
4186 "vectorization did not happen for a simd loop");
4187
4188 if (dump_enabled_p ())
4189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4190 "cost model: the vector iteration cost = %d "
4191 "divided by the scalar iteration cost = %d "
4192 "is greater or equal to the vectorization factor = %d"
4193 ".\n",
4194 vec_inside_cost, scalar_single_iter_cost, assumed_vf);
4195 *ret_min_profitable_niters = -1;
4196 *ret_min_profitable_estimate = -1;
4197 return;
4198 }
4199
4200 /* ??? The "if" arm is written to handle all cases; see below for what
4201 we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
4202 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4203 {
4204 /* Rewriting the condition above in terms of the number of
4205 vector iterations (vniters) rather than the number of
4206 scalar iterations (niters) gives:
4207
4208 SIC * (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC
4209
4210 <==> vniters * (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC
4211
4212 For integer N, X and Y when X > 0:
4213
4214 N * X > Y <==> N >= (Y /[floor] X) + 1. */
4215 int outside_overhead = (vec_outside_cost
4216 - scalar_single_iter_cost * peel_iters_prologue
4217 - scalar_single_iter_cost * peel_iters_epilogue
4218 - scalar_outside_cost);
4219 /* We're only interested in cases that require at least one
4220 vector iteration. */
4221 int min_vec_niters = 1;
4222 if (outside_overhead > 0)
4223 min_vec_niters = outside_overhead / saving_per_viter + 1;
4224
4225 if (dump_enabled_p ())
4226 dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n",
4227 min_vec_niters);
4228
4229 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4230 {
4231 /* Now that we know the minimum number of vector iterations,
4232 find the minimum niters for which the scalar cost is larger:
4233
4234 SIC * niters > VIC * vniters + VOC - SOC
4235
4236 We know that the minimum niters is no more than
4237 vniters * VF + NPEEL, but it might be (and often is) less
4238 than that if a partial vector iteration is cheaper than the
4239 equivalent scalar code. */
4240 int threshold = (vec_inside_cost * min_vec_niters
4241 + vec_outside_cost
4242 - scalar_outside_cost);
4243 if (threshold <= 0)
4244 min_profitable_iters = 1;
4245 else
4246 min_profitable_iters = threshold / scalar_single_iter_cost + 1;
4247 }
4248 else
4249 /* Convert the number of vector iterations into a number of
4250 scalar iterations. */
4251 min_profitable_iters = (min_vec_niters * assumed_vf
4252 + peel_iters_prologue
4253 + peel_iters_epilogue);
4254 }
4255 else
4256 {
4257 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost)
4258 * assumed_vf
4259 - vec_inside_cost * peel_iters_prologue
4260 - vec_inside_cost * peel_iters_epilogue);
4261 if (min_profitable_iters <= 0)
4262 min_profitable_iters = 0;
4263 else
4264 {
4265 min_profitable_iters /= saving_per_viter;
4266
4267 if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters)
4268 <= (((int) vec_inside_cost * min_profitable_iters)
4269 + (((int) vec_outside_cost - scalar_outside_cost)
4270 * assumed_vf)))
4271 min_profitable_iters++;
4272 }
4273 }
4274
4275 if (dump_enabled_p ())
4276 dump_printf (MSG_NOTE,
4277 " Calculated minimum iters for profitability: %d\n",
4278 min_profitable_iters);
4279
4280 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p
4281 && min_profitable_iters < (assumed_vf + peel_iters_prologue))
4282 /* We want the vectorized loop to execute at least once. */
4283 min_profitable_iters = assumed_vf + peel_iters_prologue;
4284 else if (min_profitable_iters < peel_iters_prologue)
4285 /* For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the
4286 vectorized loop executes at least once. */
4287 min_profitable_iters = peel_iters_prologue;
4288
4289 if (dump_enabled_p ())
4290 dump_printf_loc (MSG_NOTE, vect_location,
4291 " Runtime profitability threshold = %d\n",
4292 min_profitable_iters);
4293
4294 *ret_min_profitable_niters = min_profitable_iters;
4295
4296 /* Calculate number of iterations required to make the vector version
4297 profitable, relative to the loop bodies only.
4298
4299 Non-vectorized variant is SIC * niters and it must win over vector
4300 variant on the expected loop trip count. The following condition must hold true:
4301 SIC * niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC */
4302
4303 if (vec_outside_cost <= 0)
4304 min_profitable_estimate = 0;
4305 /* ??? This "else if" arm is written to handle all cases; see below for
4306 what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
4307 else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4308 {
4309 /* This is a repeat of the code above, but with + SOC rather
4310 than - SOC. */
4311 int outside_overhead = (vec_outside_cost
4312 - scalar_single_iter_cost * peel_iters_prologue
4313 - scalar_single_iter_cost * peel_iters_epilogue
4314 + scalar_outside_cost);
4315 int min_vec_niters = 1;
4316 if (outside_overhead > 0)
4317 min_vec_niters = outside_overhead / saving_per_viter + 1;
4318
4319 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4320 {
4321 int threshold = (vec_inside_cost * min_vec_niters
4322 + vec_outside_cost
4323 + scalar_outside_cost);
4324 min_profitable_estimate = threshold / scalar_single_iter_cost + 1;
4325 }
4326 else
4327 min_profitable_estimate = (min_vec_niters * assumed_vf
4328 + peel_iters_prologue
4329 + peel_iters_epilogue);
4330 }
4331 else
4332 {
4333 min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost)
4334 * assumed_vf
4335 - vec_inside_cost * peel_iters_prologue
4336 - vec_inside_cost * peel_iters_epilogue)
4337 / ((scalar_single_iter_cost * assumed_vf)
4338 - vec_inside_cost);
4339 }
4340 min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters)((min_profitable_estimate) > (min_profitable_iters) ? (min_profitable_estimate
) : (min_profitable_iters))
;
4341 if (dump_enabled_p ())
4342 dump_printf_loc (MSG_NOTE, vect_location,
4343 " Static estimate profitability threshold = %d\n",
4344 min_profitable_estimate);
4345
4346 *ret_min_profitable_estimate = min_profitable_estimate;
4347}
4348
4349/* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
4350 vector elements (not bits) for a vector with NELT elements. */
4351static void
4352calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
4353 vec_perm_builder *sel)
4354{
4355 /* The encoding is a single stepped pattern. Any wrap-around is handled
4356 by vec_perm_indices. */
4357 sel->new_vector (nelt, 1, 3);
4358 for (unsigned int i = 0; i < 3; i++)
4359 sel->quick_push (i + offset);
4360}
4361
4362/* Checks whether the target supports whole-vector shifts for vectors of mode
4363 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
4364 it supports vec_perm_const with masks for all necessary shift amounts. */
4365static bool
4366have_whole_vector_shift (machine_mode mode)
4367{
4368 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4369 return true;
4370
4371 /* Variable-length vectors should be handled via the optab. */
4372 unsigned int nelt;
4373 if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
4374 return false;
4375
4376 vec_perm_builder sel;
4377 vec_perm_indices indices;
4378 for (unsigned int i = nelt / 2; i >= 1; i /= 2)
4379 {
4380 calc_vec_perm_mask_for_shift (i, nelt, &sel);
4381 indices.new_vector (sel, 2, nelt);
4382 if (!can_vec_perm_const_p (mode, indices, false))
4383 return false;
4384 }
4385 return true;
4386}
4387
4388/* TODO: Close dependency between vect_model_*_cost and vectorizable_*
4389 functions. Design better to avoid maintenance issues. */
4390
4391/* Function vect_model_reduction_cost.
4392
4393 Models cost for a reduction operation, including the vector ops
4394 generated within the strip-mine loop in some cases, the initial
4395 definition before the loop, and the epilogue code that must be generated. */
4396
4397static void
4398vect_model_reduction_cost (loop_vec_info loop_vinfo,
4399 stmt_vec_info stmt_info, internal_fn reduc_fn,
4400 vect_reduction_type reduction_type,
4401 int ncopies, stmt_vector_for_cost *cost_vec)
4402{
4403 int prologue_cost = 0, epilogue_cost = 0, inside_cost = 0;
4404 enum tree_code code;
4405 optab optab;
4406 tree vectype;
4407 machine_mode mode;
4408 class loop *loop = NULLnullptr;
4409
4410 if (loop_vinfo)
4411 loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
4412
4413 /* Condition reductions generate two reductions in the loop. */
4414 if (reduction_type == COND_REDUCTION)
4415 ncopies *= 2;
4416
4417 vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype;
4418 mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4418, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
;
4419 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
4420
4421 code = gimple_assign_rhs_code (orig_stmt_info->stmt);
4422
4423 if (reduction_type == EXTRACT_LAST_REDUCTION)
4424 /* No extra instructions are needed in the prologue. The loop body
4425 operations are costed in vectorizable_condition. */
4426 inside_cost = 0;
4427 else if (reduction_type == FOLD_LEFT_REDUCTION)
4428 {
4429 /* No extra instructions needed in the prologue. */
4430 prologue_cost = 0;
4431
4432 if (reduc_fn != IFN_LAST)
4433 /* Count one reduction-like operation per vector. */
4434 inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar,
4435 stmt_info, 0, vect_body);
4436 else
4437 {
4438 /* Use NELEMENTS extracts and NELEMENTS scalar ops. */
4439 unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
4440 inside_cost = record_stmt_cost (cost_vec, nelements,
4441 vec_to_scalar, stmt_info, 0,
4442 vect_body);
4443 inside_cost += record_stmt_cost (cost_vec, nelements,
4444 scalar_stmt, stmt_info, 0,
4445 vect_body);
4446 }
4447 }
4448 else
4449 {
4450 /* Add in cost for initial definition.
4451 For cond reduction we have four vectors: initial index, step,
4452 initial result of the data reduction, initial value of the index
4453 reduction. */
4454 int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1;
4455 prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
4456 scalar_to_vec, stmt_info, 0,
4457 vect_prologue);
4458 }
4459
4460 /* Determine cost of epilogue code.
4461
4462 We have a reduction operator that will reduce the vector in one statement.
4463 Also requires scalar extract. */
4464
4465 if (!loop || !nested_in_vect_loop_p (loop, orig_stmt_info))
4466 {
4467 if (reduc_fn != IFN_LAST)
4468 {
4469 if (reduction_type == COND_REDUCTION)
4470 {
4471 /* An EQ stmt and an COND_EXPR stmt. */
4472 epilogue_cost += record_stmt_cost (cost_vec, 2,
4473 vector_stmt, stmt_info, 0,
4474 vect_epilogue);
4475 /* Reduction of the max index and a reduction of the found
4476 values. */
4477 epilogue_cost += record_stmt_cost (cost_vec, 2,
4478 vec_to_scalar, stmt_info, 0,
4479 vect_epilogue);
4480 /* A broadcast of the max value. */
4481 epilogue_cost += record_stmt_cost (cost_vec, 1,
4482 scalar_to_vec, stmt_info, 0,
4483 vect_epilogue);
4484 }
4485 else
4486 {
4487 epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
4488 stmt_info, 0, vect_epilogue);
4489 epilogue_cost += record_stmt_cost (cost_vec, 1,
4490 vec_to_scalar, stmt_info, 0,
4491 vect_epilogue);
4492 }
4493 }
4494 else if (reduction_type == COND_REDUCTION)
4495 {
4496 unsigned estimated_nunits = vect_nunits_for_cost (vectype);
4497 /* Extraction of scalar elements. */
4498 epilogue_cost += record_stmt_cost (cost_vec,
4499 2 * estimated_nunits,
4500 vec_to_scalar, stmt_info, 0,
4501 vect_epilogue);
4502 /* Scalar max reductions via COND_EXPR / MAX_EXPR. */
4503 epilogue_cost += record_stmt_cost (cost_vec,
4504 2 * estimated_nunits - 3,
4505 scalar_stmt, stmt_info, 0,
4506 vect_epilogue);
4507 }
4508 else if (reduction_type == EXTRACT_LAST_REDUCTION
4509 || reduction_type == FOLD_LEFT_REDUCTION)
4510 /* No extra instructions need in the epilogue. */
4511 ;
4512 else
4513 {
4514 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)((tree_class_check ((vectype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4514, __FUNCTION__))->type_common.size)
);
4515 tree bitsize =
4516 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt_info->stmt)))((tree_class_check ((((contains_struct_check ((gimple_assign_lhs
(orig_stmt_info->stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4516, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4516, __FUNCTION__))->type_common.size)
;
4517 int element_bitsize = tree_to_uhwi (bitsize);
4518 int nelements = vec_size_in_bits / element_bitsize;
4519
4520 if (code == COND_EXPR)
4521 code = MAX_EXPR;
4522
4523 optab = optab_for_tree_code (code, vectype, optab_default);
4524
4525 /* We have a whole vector shift available. */
4526 if (optab != unknown_optab
4527 && VECTOR_MODE_P (mode)(((enum mode_class) mode_class[mode]) == MODE_VECTOR_BOOL || (
(enum mode_class) mode_class[mode]) == MODE_VECTOR_INT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_FLOAT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_FRACT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_UFRACT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_ACCUM || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_UACCUM)
4528 && optab_handler (optab, mode) != CODE_FOR_nothing
4529 && have_whole_vector_shift (mode))
4530 {
4531 /* Final reduction via vector shifts and the reduction operator.
4532 Also requires scalar extract. */
4533 epilogue_cost += record_stmt_cost (cost_vec,
4534 exact_log2 (nelements) * 2,
4535 vector_stmt, stmt_info, 0,
4536 vect_epilogue);
4537 epilogue_cost += record_stmt_cost (cost_vec, 1,
4538 vec_to_scalar, stmt_info, 0,
4539 vect_epilogue);
4540 }
4541 else
4542 /* Use extracts and reduction op for final reduction. For N
4543 elements, we have N extracts and N-1 reduction ops. */
4544 epilogue_cost += record_stmt_cost (cost_vec,
4545 nelements + nelements - 1,
4546 vector_stmt, stmt_info, 0,
4547 vect_epilogue);
4548 }
4549 }
4550
4551 if (dump_enabled_p ())
4552 dump_printf (MSG_NOTE,
4553 "vect_model_reduction_cost: inside_cost = %d, "
4554 "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
4555 prologue_cost, epilogue_cost);
4556}
4557
4558/* SEQ is a sequence of instructions that initialize the reduction
4559 described by REDUC_INFO. Emit them in the appropriate place. */
4560
4561static void
4562vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo,
4563 stmt_vec_info reduc_info, gimple *seq)
4564{
4565 if (reduc_info->reused_accumulator)
4566 {
4567 /* When reusing an accumulator from the main loop, we only need
4568 initialization instructions if the main loop can be skipped.
4569 In that case, emit the initialization instructions at the end
4570 of the guard block that does the skip. */
4571 edge skip_edge = loop_vinfo->skip_main_loop_edge;
4572 gcc_assert (skip_edge)((void)(!(skip_edge) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4572, __FUNCTION__), 0 : 0))
;
4573 gimple_stmt_iterator gsi = gsi_last_bb (skip_edge->src);
4574 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
4575 }
4576 else
4577 {
4578 /* The normal case: emit the initialization instructions on the
4579 preheader edge. */
4580 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
4581 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), seq);
4582 }
4583}
4584
4585/* Function get_initial_def_for_reduction
4586
4587 Input:
4588 REDUC_INFO - the info_for_reduction
4589 INIT_VAL - the initial value of the reduction variable
4590 NEUTRAL_OP - a value that has no effect on the reduction, as per
4591 neutral_op_for_reduction
4592
4593 Output:
4594 Return a vector variable, initialized according to the operation that
4595 STMT_VINFO performs. This vector will be used as the initial value
4596 of the vector of partial results.
4597
4598 The value we need is a vector in which element 0 has value INIT_VAL
4599 and every other element has value NEUTRAL_OP. */
4600
4601static tree
4602get_initial_def_for_reduction (loop_vec_info loop_vinfo,
4603 stmt_vec_info reduc_info,
4604 tree init_val, tree neutral_op)
4605{
4606 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
4607 tree scalar_type = TREE_TYPE (init_val)((contains_struct_check ((init_val), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4607, __FUNCTION__))->typed.type)
;
4608 tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
4609 tree init_def;
4610 gimple_seq stmts = NULLnullptr;
4611
4612 gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4612, __FUNCTION__), 0 : 0))
;
4613
4614 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE
)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4615, __FUNCTION__), 0 : 0))
4615 || SCALAR_FLOAT_TYPE_P (scalar_type))((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE
)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4615, __FUNCTION__), 0 : 0))
;
4616
4617 gcc_assert (nested_in_vect_loop_p (loop, reduc_info)((void)(!(nested_in_vect_loop_p (loop, reduc_info) || loop ==
(gimple_bb (reduc_info->stmt))->loop_father) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4618, __FUNCTION__), 0 : 0))
4618 || loop == (gimple_bb (reduc_info->stmt))->loop_father)((void)(!(nested_in_vect_loop_p (loop, reduc_info) || loop ==
(gimple_bb (reduc_info->stmt))->loop_father) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4618, __FUNCTION__), 0 : 0))
;
4619
4620 if (operand_equal_p (init_val, neutral_op))
4621 {
4622 /* If both elements are equal then the vector described above is
4623 just a splat. */
4624 neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4624, __FUNCTION__))->typed.type)
, neutral_op);
4625 init_def = gimple_build_vector_from_val (&stmts, vectype, neutral_op);
4626 }
4627 else
4628 {
4629 neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4629, __FUNCTION__))->typed.type)
, neutral_op);
4630 init_val = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4630, __FUNCTION__))->typed.type)
, init_val);
4631 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ())
4632 {
4633 /* Construct a splat of NEUTRAL_OP and insert INIT_VAL into
4634 element 0. */
4635 init_def = gimple_build_vector_from_val (&stmts, vectype,
4636 neutral_op);
4637 init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT,
4638 vectype, init_def, init_val);
4639 }
4640 else
4641 {
4642 /* Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. */
4643 tree_vector_builder elts (vectype, 1, 2);
4644 elts.quick_push (init_val);
4645 elts.quick_push (neutral_op);
4646 init_def = gimple_build_vector (&stmts, &elts);
4647 }
4648 }
4649
4650 if (stmts)
4651 vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, stmts);
4652 return init_def;
4653}
4654
4655/* Get at the initial defs for the reduction PHIs for REDUC_INFO,
4656 which performs a reduction involving GROUP_SIZE scalar statements.
4657 NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP
4658 is nonnull, introducing extra elements of that value will not change the
4659 result. */
4660
4661static void
4662get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
4663 stmt_vec_info reduc_info,
4664 vec<tree> *vec_oprnds,
4665 unsigned int number_of_vectors,
4666 unsigned int group_size, tree neutral_op)
4667{
4668 vec<tree> &initial_values = reduc_info->reduc_initial_values;
4669 unsigned HOST_WIDE_INTlong nunits;
4670 unsigned j, number_of_places_left_in_vector;
4671 tree vector_type = STMT_VINFO_VECTYPE (reduc_info)(reduc_info)->vectype;
4672 unsigned int i;
4673
4674 gcc_assert (group_size == initial_values.length () || neutral_op)((void)(!(group_size == initial_values.length () || neutral_op
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4674, __FUNCTION__), 0 : 0))
;
4675
4676 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
4677 created vectors. It is greater than 1 if unrolling is performed.
4678
4679 For example, we have two scalar operands, s1 and s2 (e.g., group of
4680 strided accesses of size two), while NUNITS is four (i.e., four scalars
4681 of this type can be packed in a vector). The output vector will contain
4682 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
4683 will be 2).
4684
4685 If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several
4686 vectors containing the operands.
4687
4688 For example, NUNITS is four as before, and the group size is 8
4689 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
4690 {s5, s6, s7, s8}. */
4691
4692 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
4693 nunits = group_size;
4694
4695 number_of_places_left_in_vector = nunits;
4696 bool constant_p = true;
4697 tree_vector_builder elts (vector_type, nunits, 1);
4698 elts.quick_grow (nunits);
4699 gimple_seq ctor_seq = NULLnullptr;
4700 for (j = 0; j < nunits * number_of_vectors; ++j)
4701 {
4702 tree op;
4703 i = j % group_size;
4704
4705 /* Get the def before the loop. In reduction chain we have only
4706 one initial value. Else we have as many as PHIs in the group. */
4707 if (i >= initial_values.length () || (j > i && neutral_op))
4708 op = neutral_op;
4709 else
4710 op = initial_values[i];
4711
4712 /* Create 'vect_ = {op0,op1,...,opn}'. */
4713 number_of_places_left_in_vector--;
4714 elts[nunits - number_of_places_left_in_vector - 1] = op;
4715 if (!CONSTANT_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_constant)
)
4716 constant_p = false;
4717
4718 if (number_of_places_left_in_vector == 0)
4719 {
4720 tree init;
4721 if (constant_p && !neutral_op
4722 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
4723 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type), nunits)))
4724 /* Build the vector directly from ELTS. */
4725 init = gimple_build_vector (&ctor_seq, &elts);
4726 else if (neutral_op)
4727 {
4728 /* Build a vector of the neutral value and shift the
4729 other elements into place. */
4730 init = gimple_build_vector_from_val (&ctor_seq, vector_type,
4731 neutral_op);
4732 int k = nunits;
4733 while (k > 0 && elts[k - 1] == neutral_op)
4734 k -= 1;
4735 while (k > 0)
4736 {
4737 k -= 1;
4738 init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
4739 vector_type, init, elts[k]);
4740 }
4741 }
4742 else
4743 {
4744 /* First time round, duplicate ELTS to fill the
4745 required number of vectors. */
4746 duplicate_and_interleave (loop_vinfo, &ctor_seq, vector_type,
4747 elts, number_of_vectors, *vec_oprnds);
4748 break;
4749 }
4750 vec_oprnds->quick_push (init);
4751
4752 number_of_places_left_in_vector = nunits;
4753 elts.new_vector (vector_type, nunits, 1);
4754 elts.quick_grow (nunits);
4755 constant_p = true;
4756 }
4757 }
4758 if (ctor_seq != NULLnullptr)
4759 vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, ctor_seq);
4760}
4761
4762/* For a statement STMT_INFO taking part in a reduction operation return
4763 the stmt_vec_info the meta information is stored on. */
4764
4765stmt_vec_info
4766info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info)
4767{
4768 stmt_info = vect_orig_stmt (stmt_info);
4769 gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info))((void)(!((stmt_info)->reduc_def) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4769, __FUNCTION__), 0 : 0))
;
4770 if (!is_a <gphi *> (stmt_info->stmt)
4771 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info
)->def_type) == vect_double_reduction_def) || (((stmt_info
)->def_type) == vect_nested_cycle))
)
4772 stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def;
4773 gphi *phi = as_a <gphi *> (stmt_info->stmt);
4774 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def)
4775 {
4776 if (gimple_phi_num_args (phi) == 1)
4777 stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def;
4778 }
4779 else if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle)
4780 {
4781 stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi));
4782 if (info && STMT_VINFO_DEF_TYPE (info)(info)->def_type == vect_double_reduction_def)
4783 stmt_info = info;
4784 }
4785 return stmt_info;
4786}
4787
4788/* See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that
4789 REDUC_INFO can build on. Adjust REDUC_INFO and return true if so, otherwise
4790 return false. */
4791
4792static bool
4793vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
4794 stmt_vec_info reduc_info)
4795{
4796 loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info;
4797 if (!main_loop_vinfo)
4798 return false;
4799
4800 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type != TREE_CODE_REDUCTION)
4801 return false;
4802
4803 unsigned int num_phis = reduc_info->reduc_initial_values.length ();
4804 auto_vec<tree, 16> main_loop_results (num_phis);
4805 auto_vec<tree, 16> initial_values (num_phis);
4806 if (edge main_loop_edge = loop_vinfo->main_loop_edge)
4807 {
4808 /* The epilogue loop can be entered either from the main loop or
4809 from an earlier guard block. */
4810 edge skip_edge = loop_vinfo->skip_main_loop_edge;
4811 for (tree incoming_value : reduc_info->reduc_initial_values)
4812 {
4813 /* Look for:
4814
4815 INCOMING_VALUE = phi<MAIN_LOOP_RESULT(main loop),
4816 INITIAL_VALUE(guard block)>. */
4817 gcc_assert (TREE_CODE (incoming_value) == SSA_NAME)((void)(!(((enum tree_code) (incoming_value)->base.code) ==
SSA_NAME) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4817, __FUNCTION__), 0 : 0))
;
4818
4819 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (incoming_value)(tree_check ((incoming_value), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4819, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
);
4820 gcc_assert (gimple_bb (phi) == main_loop_edge->dest)((void)(!(gimple_bb (phi) == main_loop_edge->dest) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4820, __FUNCTION__), 0 : 0))
;
4821
4822 tree from_main_loop = PHI_ARG_DEF_FROM_EDGE (phi, main_loop_edge)gimple_phi_arg_def (((phi)), ((main_loop_edge)->dest_idx));
4823 tree from_skip = PHI_ARG_DEF_FROM_EDGE (phi, skip_edge)gimple_phi_arg_def (((phi)), ((skip_edge)->dest_idx));
4824
4825 main_loop_results.quick_push (from_main_loop);
4826 initial_values.quick_push (from_skip);
4827 }
4828 }
4829 else
4830 /* The main loop dominates the epilogue loop. */
4831 main_loop_results.splice (reduc_info->reduc_initial_values);
4832
4833 /* See if the main loop has the kind of accumulator we need. */
4834 vect_reusable_accumulator *accumulator
4835 = main_loop_vinfo->reusable_accumulators.get (main_loop_results[0]);
4836 if (!accumulator
4837 || num_phis != accumulator->reduc_info->reduc_scalar_results.length ()
4838 || !std::equal (main_loop_results.begin (), main_loop_results.end (),
4839 accumulator->reduc_info->reduc_scalar_results.begin ()))
4840 return false;
4841
4842 /* Handle the case where we can reduce wider vectors to narrower ones. */
4843 tree vectype = STMT_VINFO_VECTYPE (reduc_info)(reduc_info)->vectype;
4844 tree old_vectype = TREE_TYPE (accumulator->reduc_input)((contains_struct_check ((accumulator->reduc_input), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4844, __FUNCTION__))->typed.type)
;
4845 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype),
4846 TYPE_VECTOR_SUBPARTS (vectype)))
4847 return false;
4848
4849 /* Non-SLP reductions might apply an adjustment after the reduction
4850 operation, in order to simplify the initialization of the accumulator.
4851 If the epilogue loop carries on from where the main loop left off,
4852 it should apply the same adjustment to the final reduction result.
4853
4854 If the epilogue loop can also be entered directly (rather than via
4855 the main loop), we need to be able to handle that case in the same way,
4856 with the same adjustment. (In principle we could add a PHI node
4857 to select the correct adjustment, but in practice that shouldn't be
4858 necessary.) */
4859 tree main_adjustment
4860 = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info)(accumulator->reduc_info)->reduc_epilogue_adjustment;
4861 if (loop_vinfo->main_loop_edge && main_adjustment)
4862 {
4863 gcc_assert (num_phis == 1)((void)(!(num_phis == 1) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4863, __FUNCTION__), 0 : 0))
;
4864 tree initial_value = initial_values[0];
4865 /* Check that we can use INITIAL_VALUE as the adjustment and
4866 initialize the accumulator with a neutral value instead. */
4867 if (!operand_equal_p (initial_value, main_adjustment))
4868 return false;
4869 tree_code code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code;
4870 initial_values[0] = neutral_op_for_reduction (TREE_TYPE (initial_value)((contains_struct_check ((initial_value), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4870, __FUNCTION__))->typed.type)
,
4871 code, initial_value);
4872 }
4873 STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment = main_adjustment;
4874 reduc_info->reduc_initial_values.truncate (0);
4875 reduc_info->reduc_initial_values.splice (initial_values);
4876 reduc_info->reused_accumulator = accumulator;
4877 return true;
4878}
4879
4880/* Reduce the vector VEC_DEF down to VECTYPE with reduction operation
4881 CODE emitting stmts before GSI. Returns a vector def of VECTYPE. */
4882
4883static tree
4884vect_create_partial_epilog (tree vec_def, tree vectype, enum tree_code code,
4885 gimple_seq *seq)
4886{
4887 unsigned nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec_def)((contains_struct_check ((vec_def), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4887, __FUNCTION__))->typed.type)
).to_constant ();
4888 unsigned nunits1 = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
4889 tree stype = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4889, __FUNCTION__))->typed.type)
;
4890 tree new_temp = vec_def;
4891 while (nunits > nunits1)
4892 {
4893 nunits /= 2;
4894 tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4894, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
,
4895 stype, nunits);
4896 unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4896, __FUNCTION__))->type_common.size)
);
4897
4898 /* The target has to make sure we support lowpart/highpart
4899 extraction, either via direct vector extract or through
4900 an integer mode punning. */
4901 tree dst1, dst2;
4902 gimple *epilog_stmt;
4903 if (convert_optab_handler (vec_extract_optab,
4904 TYPE_MODE (TREE_TYPE (new_temp))((((enum tree_code) ((tree_class_check ((((contains_struct_check
((new_temp), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4904, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4904, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(((contains_struct_check ((new_temp), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4904, __FUNCTION__))->typed.type)) : (((contains_struct_check
((new_temp), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4904, __FUNCTION__))->typed.type))->type_common.mode)
,
4905 TYPE_MODE (vectype1)((((enum tree_code) ((tree_class_check ((vectype1), (tcc_type
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4905, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype1) : (vectype1)->type_common.mode)
)
4906 != CODE_FOR_nothing)
4907 {
4908 /* Extract sub-vectors directly once vec_extract becomes
4909 a conversion optab. */
4910 dst1 = make_ssa_name (vectype1);
4911 epilog_stmt
4912 = gimple_build_assign (dst1, BIT_FIELD_REF,
4913 build3 (BIT_FIELD_REF, vectype1,
4914 new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4914, __FUNCTION__))->type_common.size)
,
4915 bitsize_int (0)size_int_kind (0, stk_bitsizetype)));
4916 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
4917 dst2 = make_ssa_name (vectype1);
4918 epilog_stmt
4919 = gimple_build_assign (dst2, BIT_FIELD_REF,
4920 build3 (BIT_FIELD_REF, vectype1,
4921 new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4921, __FUNCTION__))->type_common.size)
,
4922 bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype)));
4923 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
4924 }
4925 else
4926 {
4927 /* Extract via punning to appropriately sized integer mode
4928 vector. */
4929 tree eltype = build_nonstandard_integer_type (bitsize, 1);
4930 tree etype = build_vector_type (eltype, 2);
4931 gcc_assert (convert_optab_handler (vec_extract_optab,((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4932, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4933, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4934, __FUNCTION__), 0 : 0))
4932 TYPE_MODE (etype),((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4932, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4933, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4934, __FUNCTION__), 0 : 0))
4933 TYPE_MODE (eltype))((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4932, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4933, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4934, __FUNCTION__), 0 : 0))
4934 != CODE_FOR_nothing)((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4932, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4933, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4934, __FUNCTION__), 0 : 0))
;
4935 tree tem = make_ssa_name (etype);
4936 epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR,
4937 build1 (VIEW_CONVERT_EXPR,
4938 etype, new_temp));
4939 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
4940 new_temp = tem;
4941 tem = make_ssa_name (eltype);
4942 epilog_stmt
4943 = gimple_build_assign (tem, BIT_FIELD_REF,
4944 build3 (BIT_FIELD_REF, eltype,
4945 new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4945, __FUNCTION__))->type_common.size)
,
4946 bitsize_int (0)size_int_kind (0, stk_bitsizetype)));
4947 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
4948 dst1 = make_ssa_name (vectype1);
4949 epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR,
4950 build1 (VIEW_CONVERT_EXPR,
4951 vectype1, tem));
4952 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
4953 tem = make_ssa_name (eltype);
4954 epilog_stmt
4955 = gimple_build_assign (tem, BIT_FIELD_REF,
4956 build3 (BIT_FIELD_REF, eltype,
4957 new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4957, __FUNCTION__))->type_common.size)
,
4958 bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype)));
4959 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
4960 dst2 = make_ssa_name (vectype1);
4961 epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
4962 build1 (VIEW_CONVERT_EXPR,
4963 vectype1, tem));
4964 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
4965 }
4966
4967 new_temp = make_ssa_name (vectype1);
4968 epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2);
4969 gimple_seq_add_stmt_without_update (seq, epilog_stmt);
4970 }
4971
4972 return new_temp;
4973}
4974
4975/* Function vect_create_epilog_for_reduction
4976
4977 Create code at the loop-epilog to finalize the result of a reduction
4978 computation.
4979
4980 STMT_INFO is the scalar reduction stmt that is being vectorized.
4981 SLP_NODE is an SLP node containing a group of reduction statements. The
4982 first one in this group is STMT_INFO.
4983 SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE
4984 REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi
4985 (counting from 0)
4986
4987 This function:
4988 1. Completes the reduction def-use cycles.
4989 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4990 by calling the function specified by REDUC_FN if available, or by
4991 other means (whole-vector shifts or a scalar loop).
4992 The function also creates a new phi node at the loop exit to preserve
4993 loop-closed form, as illustrated below.
4994
4995 The flow at the entry to this function:
4996
4997 loop:
4998 vec_def = phi <vec_init, null> # REDUCTION_PHI
4999 VECT_DEF = vector_stmt # vectorized form of STMT_INFO
5000 s_loop = scalar_stmt # (scalar) STMT_INFO
5001 loop_exit:
5002 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
5003 use <s_out0>
5004 use <s_out0>
5005
5006 The above is transformed by this function into:
5007
5008 loop:
5009 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
5010 VECT_DEF = vector_stmt # vectorized form of STMT_INFO
5011 s_loop = scalar_stmt # (scalar) STMT_INFO
5012 loop_exit:
5013 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
5014 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
5015 v_out2 = reduce <v_out1>
5016 s_out3 = extract_field <v_out2, 0>
5017 s_out4 = adjust_result <s_out3>
5018 use <s_out4>
5019 use <s_out4>
5020*/
5021
5022static void
5023vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
5024 stmt_vec_info stmt_info,
5025 slp_tree slp_node,
5026 slp_instance slp_node_instance)
5027{
5028 stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
5029 gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5029, __FUNCTION__), 0 : 0))
;
5030 /* For double reductions we need to get at the inner loop reduction
5031 stmt which has the meta info attached. Our stmt_info is that of the
5032 loop-closed PHI of the inner loop which we remember as
5033 def for the reduction PHI generation. */
5034 bool double_reduc = false;
5035 stmt_vec_info rdef_info = stmt_info;
5036 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def)
5037 {
5038 gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5038, __FUNCTION__), 0 : 0))
;
5039 double_reduc = true;
5040 stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
5041 (stmt_info->stmt, 0));
5042 stmt_info = vect_stmt_to_vectorize (stmt_info);
5043 }
5044 gphi *reduc_def_stmt
5045 = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))(vect_orig_stmt (stmt_info))->reduc_def->stmt);
5046 enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code;
5047 internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info)(reduc_info)->reduc_fn;
5048 tree vectype;
5049 machine_mode mode;
5050 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, *outer_loop = NULLnullptr;
5051 basic_block exit_bb;
5052 tree scalar_dest;
5053 tree scalar_type;
5054 gimple *new_phi = NULLnullptr, *phi;
5055 gimple_stmt_iterator exit_gsi;
5056 tree new_temp = NULL_TREE(tree) nullptr, new_name, new_scalar_dest;
5057 gimple *epilog_stmt = NULLnullptr;
5058 gimple *exit_phi;
5059 tree bitsize;
5060 tree def;
5061 tree orig_name, scalar_result;
5062 imm_use_iterator imm_iter, phi_imm_iter;
5063 use_operand_p use_p, phi_use_p;
5064 gimple *use_stmt;
5065 auto_vec<tree> reduc_inputs;
5066 int j, i;
5067 vec<tree> &scalar_results = reduc_info->reduc_scalar_results;
5068 unsigned int group_size = 1, k;
5069 auto_vec<gimple *> phis;
5070 /* SLP reduction without reduction chain, e.g.,
5071 # a1 = phi <a2, a0>
5072 # b1 = phi <b2, b0>
5073 a2 = operation (a1)
5074 b2 = operation (b1) */
5075 bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5075, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
);
5076 bool direct_slp_reduc;
5077 tree induction_index = NULL_TREE(tree) nullptr;
5078
5079 if (slp_node)
5080 group_size = SLP_TREE_LANES (slp_node)(slp_node)->lanes;
5081
5082 if (nested_in_vect_loop_p (loop, stmt_info))
5083 {
5084 outer_loop = loop;
5085 loop = loop->inner;
5086 gcc_assert (!slp_node && double_reduc)((void)(!(!slp_node && double_reduc) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5086, __FUNCTION__), 0 : 0))
;
5087 }
5088
5089 vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info)(reduc_info)->reduc_vectype;
5090 gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5090, __FUNCTION__), 0 : 0))
;
5091 mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5091, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
;
5092
5093 tree induc_val = NULL_TREE(tree) nullptr;
5094 tree adjustment_def = NULLnullptr;
5095 if (slp_node)
5096 ;
5097 else
5098 {
5099 /* Optimize: for induction condition reduction, if we can't use zero
5100 for induc_val, use initial_def. */
5101 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION)
5102 induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)(reduc_info)->induc_cond_initial_val;
5103 else if (double_reduc)
5104 ;
5105 else
5106 adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment;
5107 }
5108
5109 stmt_vec_info single_live_out_stmt[] = { stmt_info };
5110 array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt;
5111 if (slp_reduc)
5112 /* All statements produce live-out values. */
5113 live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts;
5114 else if (slp_node)
5115 /* The last statement in the reduction chain produces the live-out
5116 value. */
5117 single_live_out_stmt[0] = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts[group_size - 1];
5118
5119 unsigned vec_num;
5120 int ncopies;
5121 if (slp_node)
5122 {
5123 vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)(slp_node_instance->reduc_phis)->vec_stmts.length ();
5124 ncopies = 1;
5125 }
5126 else
5127 {
5128 stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt);
5129 vec_num = 1;
5130 ncopies = STMT_VINFO_VEC_STMTS (reduc_info)(reduc_info)->vec_stmts.length ();
5131 }
5132
5133 /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
5134 which is updated with the current index of the loop for every match of
5135 the original loop's cond_expr (VEC_STMT). This results in a vector
5136 containing the last time the condition passed for that vector lane.
5137 The first match will be a 1 to allow 0 to be used for non-matching
5138 indexes. If there are no matches at all then the vector will be all
5139 zeroes.
5140
5141 PR92772: This algorithm is broken for architectures that support
5142 masked vectors, but do not provide fold_extract_last. */
5143 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION)
5144 {
5145 auto_vec<std::pair<tree, bool>, 2> ccompares;
5146 stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info)(reduc_info)->reduc_def;
5147 cond_info = vect_stmt_to_vectorize (cond_info);
5148 while (cond_info != reduc_info)
5149 {
5150 if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
5151 {
5152 gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)(cond_info)->vec_stmts[0];
5153 gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)((void)(!(gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)
? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5153, __FUNCTION__), 0 : 0))
;
5154 ccompares.safe_push
5155 (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)),
5156 STMT_VINFO_REDUC_IDX (cond_info)(cond_info)->reduc_idx == 2));
5157 }
5158 cond_info
5159 = loop_vinfo->lookup_def (gimple_op (cond_info->stmt,
5160 1 + STMT_VINFO_REDUC_IDX(cond_info)->reduc_idx
5161 (cond_info)(cond_info)->reduc_idx));
5162 cond_info = vect_stmt_to_vectorize (cond_info);
5163 }
5164 gcc_assert (ccompares.length () != 0)((void)(!(ccompares.length () != 0) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5164, __FUNCTION__), 0 : 0))
;
5165
5166 tree indx_before_incr, indx_after_incr;
5167 poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
5168 int scalar_precision
5169 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))(as_a <scalar_mode> ((tree_class_check ((((contains_struct_check
((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5169, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5169, __FUNCTION__))->type_common.mode))
);
5170 tree cr_index_scalar_type = make_unsigned_type (scalar_precision);
5171 tree cr_index_vector_type = get_related_vectype_for_scalar_type
5172 (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5172, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
, cr_index_scalar_type,
5173 TYPE_VECTOR_SUBPARTS (vectype));
5174
5175 /* First we create a simple vector induction variable which starts
5176 with the values {1,2,3,...} (SERIES_VECT) and increments by the
5177 vector size (STEP). */
5178
5179 /* Create a {1,2,3,...} vector. */
5180 tree series_vect = build_index_vector (cr_index_vector_type, 1, 1);
5181
5182 /* Create a vector of the step value. */
5183 tree step = build_int_cst (cr_index_scalar_type, nunits_out);
5184 tree vec_step = build_vector_from_val (cr_index_vector_type, step);
5185
5186 /* Create an induction variable. */
5187 gimple_stmt_iterator incr_gsi;
5188 bool insert_after;
5189 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5190 create_iv (series_vect, vec_step, NULL_TREE(tree) nullptr, loop, &incr_gsi,
5191 insert_after, &indx_before_incr, &indx_after_incr);
5192
5193 /* Next create a new phi node vector (NEW_PHI_TREE) which starts
5194 filled with zeros (VEC_ZERO). */
5195
5196 /* Create a vector of 0s. */
5197 tree zero = build_zero_cst (cr_index_scalar_type);
5198 tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
5199
5200 /* Create a vector phi node. */
5201 tree new_phi_tree = make_ssa_name (cr_index_vector_type);
5202 new_phi = create_phi_node (new_phi_tree, loop->header);
5203 add_phi_arg (as_a <gphi *> (new_phi), vec_zero,
5204 loop_preheader_edge (loop), UNKNOWN_LOCATION((location_t) 0));
5205
5206 /* Now take the condition from the loops original cond_exprs
5207 and produce a new cond_exprs (INDEX_COND_EXPR) which for
5208 every match uses values from the induction variable
5209 (INDEX_BEFORE_INCR) otherwise uses values from the phi node
5210 (NEW_PHI_TREE).
5211 Finally, we update the phi (NEW_PHI_TREE) to take the value of
5212 the new cond_expr (INDEX_COND_EXPR). */
5213 gimple_seq stmts = NULLnullptr;
5214 for (int i = ccompares.length () - 1; i != -1; --i)
5215 {
5216 tree ccompare = ccompares[i].first;
5217 if (ccompares[i].second)
5218 new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
5219 cr_index_vector_type,
5220 ccompare,
5221 indx_before_incr, new_phi_tree);
5222 else
5223 new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
5224 cr_index_vector_type,
5225 ccompare,
5226 new_phi_tree, indx_before_incr);
5227 }
5228 gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT);
5229
5230 /* Update the phi with the vec cond. */
5231 induction_index = new_phi_tree;
5232 add_phi_arg (as_a <gphi *> (new_phi), induction_index,
5233 loop_latch_edge (loop), UNKNOWN_LOCATION((location_t) 0));
5234 }
5235
5236 /* 2. Create epilog code.
5237 The reduction epilog code operates across the elements of the vector
5238 of partial results computed by the vectorized loop.
5239 The reduction epilog code consists of:
5240
5241 step 1: compute the scalar result in a vector (v_out2)
5242 step 2: extract the scalar result (s_out3) from the vector (v_out2)
5243 step 3: adjust the scalar result (s_out3) if needed.
5244
5245 Step 1 can be accomplished using one the following three schemes:
5246 (scheme 1) using reduc_fn, if available.
5247 (scheme 2) using whole-vector shifts, if available.
5248 (scheme 3) using a scalar loop. In this case steps 1+2 above are
5249 combined.
5250
5251 The overall epilog code looks like this:
5252
5253 s_out0 = phi <s_loop> # original EXIT_PHI
5254 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
5255 v_out2 = reduce <v_out1> # step 1
5256 s_out3 = extract_field <v_out2, 0> # step 2
5257 s_out4 = adjust_result <s_out3> # step 3
5258
5259 (step 3 is optional, and steps 1 and 2 may be combined).
5260 Lastly, the uses of s_out0 are replaced by s_out4. */
5261
5262
5263 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
5264 v_out1 = phi <VECT_DEF>
5265 Store them in NEW_PHIS. */
5266 if (double_reduc)
5267 loop = outer_loop;
5268 exit_bb = single_exit (loop)->dest;
5269 exit_gsi = gsi_after_labels (exit_bb);
5270 reduc_inputs.create (slp_node ? vec_num : ncopies);
5271 for (unsigned i = 0; i < vec_num; i++)
5272 {
5273 gimple_seq stmts = NULLnullptr;
5274 if (slp_node)
5275 def = vect_get_slp_vect_def (slp_node, i);
5276 else
5277 def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[0]);
5278 for (j = 0; j < ncopies; j++)
5279 {
5280 tree new_def = copy_ssa_name (def);
5281 phi = create_phi_node (new_def, exit_bb);
5282 if (j)
5283 def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[j]);
5284 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def)set_ssa_use_from_ptr (gimple_phi_arg_imm_use_ptr (((phi)), ((
single_exit (loop)->dest_idx))), (def))
;
5285 new_def = gimple_convert (&stmts, vectype, new_def);
5286 reduc_inputs.quick_push (new_def);
5287 }
5288 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5289 }
5290
5291 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
5292 (i.e. when reduc_fn is not available) and in the final adjustment
5293 code (if needed). Also get the original scalar reduction variable as
5294 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
5295 represents a reduction pattern), the tree-code and scalar-def are
5296 taken from the original stmt that the pattern-stmt (STMT) replaces.
5297 Otherwise (it is a regular reduction) - the tree-code and scalar-def
5298 are taken from STMT. */
5299
5300 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
5301 if (orig_stmt_info != stmt_info)
5302 {
5303 /* Reduction pattern */
5304 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info))((void)(!((orig_stmt_info)->in_pattern_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5304, __FUNCTION__), 0 : 0))
;
5305 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info)((void)(!((orig_stmt_info)->related_stmt == stmt_info) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5305, __FUNCTION__), 0 : 0))
;
5306 }
5307
5308 scalar_dest = gimple_get_lhs (orig_stmt_info->stmt);
5309 scalar_type = TREE_TYPE (scalar_dest)((contains_struct_check ((scalar_dest), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5309, __FUNCTION__))->typed.type)
;
5310 scalar_results.create (group_size);
5311 new_scalar_dest = vect_create_destination_var (scalar_dest, NULLnullptr);
5312 bitsize = TYPE_SIZE (scalar_type)((tree_class_check ((scalar_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5312, __FUNCTION__))->type_common.size)
;
5313
5314 /* True if we should implement SLP_REDUC using native reduction operations
5315 instead of scalar operations. */
5316 direct_slp_reduc = (reduc_fn != IFN_LAST
5317 && slp_reduc
5318 && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ());
5319
5320 /* In case of reduction chain, e.g.,
5321 # a1 = phi <a3, a0>
5322 a2 = operation (a1)
5323 a3 = operation (a2),
5324
5325 we may end up with more than one vector result. Here we reduce them
5326 to one vector.
5327
5328 The same is true if we couldn't use a single defuse cycle. */
5329 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5329, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
5330 || direct_slp_reduc
5331 || ncopies > 1)
5332 {
5333 gimple_seq stmts = NULLnullptr;
5334 tree single_input = reduc_inputs[0];
5335 for (k = 1; k < reduc_inputs.length (); k++)
5336 single_input = gimple_build (&stmts, code, vectype,
5337 single_input, reduc_inputs[k]);
5338 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5339
5340 reduc_inputs.truncate (0);
5341 reduc_inputs.safe_push (single_input);
5342 }
5343
5344 tree orig_reduc_input = reduc_inputs[0];
5345
5346 /* If this loop is an epilogue loop that can be skipped after the
5347 main loop, we can only share a reduction operation between the
5348 main loop and the epilogue if we put it at the target of the
5349 skip edge.
5350
5351 We can still reuse accumulators if this check fails. Doing so has
5352 the minor(?) benefit of making the epilogue loop's scalar result
5353 independent of the main loop's scalar result. */
5354 bool unify_with_main_loop_p = false;
5355 if (reduc_info->reused_accumulator
5356 && loop_vinfo->skip_this_loop_edge
5357 && single_succ_p (exit_bb)
5358 && single_succ (exit_bb) == loop_vinfo->skip_this_loop_edge->dest)
5359 {
5360 unify_with_main_loop_p = true;
5361
5362 basic_block reduc_block = loop_vinfo->skip_this_loop_edge->dest;
5363 reduc_inputs[0] = make_ssa_name (vectype);
5364 gphi *new_phi = create_phi_node (reduc_inputs[0], reduc_block);
5365 add_phi_arg (new_phi, orig_reduc_input, single_succ_edge (exit_bb),
5366 UNKNOWN_LOCATION((location_t) 0));
5367 add_phi_arg (new_phi, reduc_info->reused_accumulator->reduc_input,
5368 loop_vinfo->skip_this_loop_edge, UNKNOWN_LOCATION((location_t) 0));
5369 exit_gsi = gsi_after_labels (reduc_block);
5370 }
5371
5372 /* Shouldn't be used beyond this point. */
5373 exit_bb = nullptr;
5374
5375 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION
5376 && reduc_fn != IFN_LAST)
5377 {
5378 /* For condition reductions, we have a vector (REDUC_INPUTS 0) containing
5379 various data values where the condition matched and another vector
5380 (INDUCTION_INDEX) containing all the indexes of those matches. We
5381 need to extract the last matching index (which will be the index with
5382 highest value) and use this to index into the data vector.
5383 For the case where there were no matches, the data vector will contain
5384 all default values and the index vector will be all zeros. */
5385
5386 /* Get various versions of the type of the vector of indexes. */
5387 tree index_vec_type = TREE_TYPE (induction_index)((contains_struct_check ((induction_index), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5387, __FUNCTION__))->typed.type)
;
5388 gcc_checking_assert (TYPE_UNSIGNED (index_vec_type))((void)(!(((tree_class_check ((index_vec_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5388, __FUNCTION__))->base.u.bits.unsigned_flag)) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5388, __FUNCTION__), 0 : 0))
;
5389 tree index_scalar_type = TREE_TYPE (index_vec_type)((contains_struct_check ((index_vec_type), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5389, __FUNCTION__))->typed.type)
;
5390 tree index_vec_cmp_type = truth_type_for (index_vec_type);
5391
5392 /* Get an unsigned integer version of the type of the data vector. */
5393 int scalar_precision
5394 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type)(as_a <scalar_mode> ((tree_class_check ((scalar_type), (
tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5394, __FUNCTION__))->type_common.mode))
);
5395 tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
5396 tree vectype_unsigned = get_same_sized_vectype (scalar_type_unsigned,
5397 vectype);
5398
5399 /* First we need to create a vector (ZERO_VEC) of zeros and another
5400 vector (MAX_INDEX_VEC) filled with the last matching index, which we
5401 can create using a MAX reduction and then expanding.
5402 In the case where the loop never made any matches, the max index will
5403 be zero. */
5404
5405 /* Vector of {0, 0, 0,...}. */
5406 tree zero_vec = build_zero_cst (vectype);
5407
5408 /* Find maximum value from the vector of found indexes. */
5409 tree max_index = make_ssa_name (index_scalar_type);
5410 gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
5411 1, induction_index);
5412 gimple_call_set_lhs (max_index_stmt, max_index);
5413 gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
5414
5415 /* Vector of {max_index, max_index, max_index,...}. */
5416 tree max_index_vec = make_ssa_name (index_vec_type);
5417 tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
5418 max_index);
5419 gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
5420 max_index_vec_rhs);
5421 gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
5422
5423 /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
5424 with the vector (INDUCTION_INDEX) of found indexes, choosing values
5425 from the data vector (REDUC_INPUTS 0) for matches, 0 (ZERO_VEC)
5426 otherwise. Only one value should match, resulting in a vector
5427 (VEC_COND) with one data value and the rest zeros.
5428 In the case where the loop never made any matches, every index will
5429 match, resulting in a vector with all data values (which will all be
5430 the default value). */
5431
5432 /* Compare the max index vector to the vector of found indexes to find
5433 the position of the max value. */
5434 tree vec_compare = make_ssa_name (index_vec_cmp_type);
5435 gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
5436 induction_index,
5437 max_index_vec);
5438 gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
5439
5440 /* Use the compare to choose either values from the data vector or
5441 zero. */
5442 tree vec_cond = make_ssa_name (vectype);
5443 gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
5444 vec_compare,
5445 reduc_inputs[0],
5446 zero_vec);
5447 gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
5448
5449 /* Finally we need to extract the data value from the vector (VEC_COND)
5450 into a scalar (MATCHED_DATA_REDUC). Logically we want to do a OR
5451 reduction, but because this doesn't exist, we can use a MAX reduction
5452 instead. The data value might be signed or a float so we need to cast
5453 it first.
5454 In the case where the loop never made any matches, the data values are
5455 all identical, and so will reduce down correctly. */
5456
5457 /* Make the matched data values unsigned. */
5458 tree vec_cond_cast = make_ssa_name (vectype_unsigned);
5459 tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
5460 vec_cond);
5461 gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
5462 VIEW_CONVERT_EXPR,
5463 vec_cond_cast_rhs);
5464 gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
5465
5466 /* Reduce down to a scalar value. */
5467 tree data_reduc = make_ssa_name (scalar_type_unsigned);
5468 gcall *data_reduc_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
5469 1, vec_cond_cast);
5470 gimple_call_set_lhs (data_reduc_stmt, data_reduc);
5471 gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
5472
5473 /* Convert the reduced value back to the result type and set as the
5474 result. */
5475 gimple_seq stmts = NULLnullptr;
5476 new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR, scalar_type,
5477 data_reduc);
5478 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5479 scalar_results.safe_push (new_temp);
5480 }
5481 else if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION
5482 && reduc_fn == IFN_LAST)
5483 {
5484 /* Condition reduction without supported IFN_REDUC_MAX. Generate
5485 idx = 0;
5486 idx_val = induction_index[0];
5487 val = data_reduc[0];
5488 for (idx = 0, val = init, i = 0; i < nelts; ++i)
5489 if (induction_index[i] > idx_val)
5490 val = data_reduc[i], idx_val = induction_index[i];
5491 return val; */
5492
5493 tree data_eltype = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5493, __FUNCTION__))->typed.type)
;
5494 tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index))((contains_struct_check ((((contains_struct_check ((induction_index
), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5494, __FUNCTION__))->typed.type)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5494, __FUNCTION__))->typed.type)
;
5495 unsigned HOST_WIDE_INTlong el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)((tree_class_check ((idx_eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5495, __FUNCTION__))->type_common.size)
);
5496 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)((contains_struct_check ((induction_index), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5496, __FUNCTION__))->typed.type)
);
5497 /* Enforced by vectorizable_reduction, which ensures we have target
5498 support before allowing a conditional reduction on variable-length
5499 vectors. */
5500 unsigned HOST_WIDE_INTlong v_size = el_size * nunits.to_constant ();
5501 tree idx_val = NULL_TREE(tree) nullptr, val = NULL_TREE(tree) nullptr;
5502 for (unsigned HOST_WIDE_INTlong off = 0; off < v_size; off += el_size)
5503 {
5504 tree old_idx_val = idx_val;
5505 tree old_val = val;
5506 idx_val = make_ssa_name (idx_eltype);
5507 epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF,
5508 build3 (BIT_FIELD_REF, idx_eltype,
5509 induction_index,
5510 bitsize_int (el_size)size_int_kind (el_size, stk_bitsizetype),
5511 bitsize_int (off)size_int_kind (off, stk_bitsizetype)));
5512 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5513 val = make_ssa_name (data_eltype);
5514 epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF,
5515 build3 (BIT_FIELD_REF,
5516 data_eltype,
5517 reduc_inputs[0],
5518 bitsize_int (el_size)size_int_kind (el_size, stk_bitsizetype),
5519 bitsize_int (off)size_int_kind (off, stk_bitsizetype)));
5520 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5521 if (off != 0)
5522 {
5523 tree new_idx_val = idx_val;
5524 if (off != v_size - el_size)
5525 {
5526 new_idx_val = make_ssa_name (idx_eltype);
5527 epilog_stmt = gimple_build_assign (new_idx_val,
5528 MAX_EXPR, idx_val,
5529 old_idx_val);
5530 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5531 }
5532 tree new_val = make_ssa_name (data_eltype);
5533 epilog_stmt = gimple_build_assign (new_val,
5534 COND_EXPR,
5535 build2 (GT_EXPR,
5536 boolean_type_nodeglobal_trees[TI_BOOLEAN_TYPE],
5537 idx_val,
5538 old_idx_val),
5539 val, old_val);
5540 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5541 idx_val = new_idx_val;
5542 val = new_val;
5543 }
5544 }
5545 /* Convert the reduced value back to the result type and set as the
5546 result. */
5547 gimple_seq stmts = NULLnullptr;
5548 val = gimple_convert (&stmts, scalar_type, val);
5549 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5550 scalar_results.safe_push (val);
5551 }
5552
5553 /* 2.3 Create the reduction code, using one of the three schemes described
5554 above. In SLP we simply need to extract all the elements from the
5555 vector (without reducing them), so we use scalar shifts. */
5556 else if (reduc_fn != IFN_LAST && !slp_reduc)
5557 {
5558 tree tmp;
5559 tree vec_elem_type;
5560
5561 /* Case 1: Create:
5562 v_out2 = reduc_expr <v_out1> */
5563
5564 if (dump_enabled_p ())
5565 dump_printf_loc (MSG_NOTE, vect_location,
5566 "Reduce using direct vector reduction.\n");
5567
5568 gimple_seq stmts = NULLnullptr;
5569 vec_elem_type = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5569, __FUNCTION__))->typed.type)
;
5570 new_temp = gimple_build (&stmts, as_combined_fn (reduc_fn),
5571 vec_elem_type, reduc_inputs[0]);
5572 new_temp = gimple_convert (&stmts, scalar_type, new_temp);
5573 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5574
5575 if ((STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION)
5576 && induc_val)
5577 {
5578 /* Earlier we set the initial value to be a vector if induc_val
5579 values. Check the result and if it is induc_val then replace
5580 with the original initial value, unless induc_val is
5581 the same as initial_def already. */
5582 tree zcompare = build2 (EQ_EXPR, boolean_type_nodeglobal_trees[TI_BOOLEAN_TYPE], new_temp,
5583 induc_val);
5584 tree initial_def = reduc_info->reduc_initial_values[0];
5585
5586 tmp = make_ssa_name (new_scalar_dest);
5587 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
5588 initial_def, new_temp);
5589 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5590 new_temp = tmp;
5591 }
5592
5593 scalar_results.safe_push (new_temp);
5594 }
5595 else if (direct_slp_reduc)
5596 {
5597 /* Here we create one vector for each of the REDUC_GROUP_SIZE results,
5598 with the elements for other SLP statements replaced with the
5599 neutral value. We can then do a normal reduction on each vector. */
5600
5601 /* Enforced by vectorizable_reduction. */
5602 gcc_assert (reduc_inputs.length () == 1)((void)(!(reduc_inputs.length () == 1) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5602, __FUNCTION__), 0 : 0))
;
5603 gcc_assert (pow2p_hwi (group_size))((void)(!(pow2p_hwi (group_size)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5603, __FUNCTION__), 0 : 0))
;
5604
5605 gimple_seq seq = NULLnullptr;
5606
5607 /* Build a vector {0, 1, 2, ...}, with the same number of elements
5608 and the same element size as VECTYPE. */
5609 tree index = build_index_vector (vectype, 0, 1);
5610 tree index_type = TREE_TYPE (index)((contains_struct_check ((index), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5610, __FUNCTION__))->typed.type)
;
5611 tree index_elt_type = TREE_TYPE (index_type)((contains_struct_check ((index_type), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5611, __FUNCTION__))->typed.type)
;
5612 tree mask_type = truth_type_for (index_type);
5613
5614 /* Create a vector that, for each element, identifies which of
5615 the REDUC_GROUP_SIZE results should use it. */
5616 tree index_mask = build_int_cst (index_elt_type, group_size - 1);
5617 index = gimple_build (&seq, BIT_AND_EXPR, index_type, index,
5618 build_vector_from_val (index_type, index_mask));
5619
5620 /* Get a neutral vector value. This is simply a splat of the neutral
5621 scalar value if we have one, otherwise the initial scalar value
5622 is itself a neutral value. */
5623 tree vector_identity = NULL_TREE(tree) nullptr;
5624 tree neutral_op = NULL_TREE(tree) nullptr;
5625 if (slp_node)
5626 {
5627 tree initial_value = NULL_TREE(tree) nullptr;
5628 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5628, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
)
5629 initial_value = reduc_info->reduc_initial_values[0];
5630 neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5630, __FUNCTION__))->typed.type)
, code,
5631 initial_value);
5632 }
5633 if (neutral_op)
5634 vector_identity = gimple_build_vector_from_val (&seq, vectype,
5635 neutral_op);
5636 for (unsigned int i = 0; i < group_size; ++i)
5637 {
5638 /* If there's no univeral neutral value, we can use the
5639 initial scalar value from the original PHI. This is used
5640 for MIN and MAX reduction, for example. */
5641 if (!neutral_op)
5642 {
5643 tree scalar_value = reduc_info->reduc_initial_values[i];
5644 scalar_value = gimple_convert (&seq, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5644, __FUNCTION__))->typed.type)
,
5645 scalar_value);
5646 vector_identity = gimple_build_vector_from_val (&seq, vectype,
5647 scalar_value);
5648 }
5649
5650 /* Calculate the equivalent of:
5651
5652 sel[j] = (index[j] == i);
5653
5654 which selects the elements of REDUC_INPUTS[0] that should
5655 be included in the result. */
5656 tree compare_val = build_int_cst (index_elt_type, i);
5657 compare_val = build_vector_from_val (index_type, compare_val);
5658 tree sel = gimple_build (&seq, EQ_EXPR, mask_type,
5659 index, compare_val);
5660
5661 /* Calculate the equivalent of:
5662
5663 vec = seq ? reduc_inputs[0] : vector_identity;
5664
5665 VEC is now suitable for a full vector reduction. */
5666 tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype,
5667 sel, reduc_inputs[0], vector_identity);
5668
5669 /* Do the reduction and convert it to the appropriate type. */
5670 tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn),
5671 TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5671, __FUNCTION__))->typed.type)
, vec);
5672 scalar = gimple_convert (&seq, scalar_type, scalar);
5673 scalar_results.safe_push (scalar);
5674 }
5675 gsi_insert_seq_before (&exit_gsi, seq, GSI_SAME_STMT);
5676 }
5677 else
5678 {
5679 bool reduce_with_shift;
5680 tree vec_temp;
5681
5682 gcc_assert (slp_reduc || reduc_inputs.length () == 1)((void)(!(slp_reduc || reduc_inputs.length () == 1) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5682, __FUNCTION__), 0 : 0))
;
5683
5684 /* See if the target wants to do the final (shift) reduction
5685 in a vector mode of smaller size and first reduce upper/lower
5686 halves against each other. */
5687 enum machine_mode mode1 = mode;
5688 tree stype = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5688, __FUNCTION__))->typed.type)
;
5689 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
5690 unsigned nunits1 = nunits;
5691 if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
5692 && reduc_inputs.length () == 1)
5693 {
5694 nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
5695 /* For SLP reductions we have to make sure lanes match up, but
5696 since we're doing individual element final reduction reducing
5697 vector width here is even more important.
5698 ??? We can also separate lanes with permutes, for the common
5699 case of power-of-two group-size odd/even extracts would work. */
5700 if (slp_reduc && nunits != nunits1)
5701 {
5702 nunits1 = least_common_multiple (nunits1, group_size);
5703 gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits)((void)(!(exact_log2 (nunits1) != -1 && nunits1 <=
nunits) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5703, __FUNCTION__), 0 : 0))
;
5704 }
5705 }
5706 if (!slp_reduc
5707 && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
5708 nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
5709
5710 tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5710, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
,
5711 stype, nunits1);
5712 reduce_with_shift = have_whole_vector_shift (mode1);
5713 if (!VECTOR_MODE_P (mode1)(((enum mode_class) mode_class[mode1]) == MODE_VECTOR_BOOL ||
((enum mode_class) mode_class[mode1]) == MODE_VECTOR_INT || (
(enum mode_class) mode_class[mode1]) == MODE_VECTOR_FLOAT || (
(enum mode_class) mode_class[mode1]) == MODE_VECTOR_FRACT || (
(enum mode_class) mode_class[mode1]) == MODE_VECTOR_UFRACT ||
((enum mode_class) mode_class[mode1]) == MODE_VECTOR_ACCUM ||
((enum mode_class) mode_class[mode1]) == MODE_VECTOR_UACCUM)
)
5714 reduce_with_shift = false;
5715 else
5716 {
5717 optab optab = optab_for_tree_code (code, vectype1, optab_default);
5718 if (optab_handler (optab, mode1) == CODE_FOR_nothing)
5719 reduce_with_shift = false;
5720 }
5721
5722 /* First reduce the vector to the desired vector size we should
5723 do shift reduction on by combining upper and lower halves. */
5724 gimple_seq stmts = NULLnullptr;
5725 new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1,
5726 code, &stmts);
5727 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5728 reduc_inputs[0] = new_temp;
5729
5730 if (reduce_with_shift && !slp_reduc)
5731 {
5732 int element_bitsize = tree_to_uhwi (bitsize);
5733 /* Enforced by vectorizable_reduction, which disallows SLP reductions
5734 for variable-length vectors and also requires direct target support
5735 for loop reductions. */
5736 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5736, __FUNCTION__))->type_common.size)
);
5737 int nelements = vec_size_in_bits / element_bitsize;
5738 vec_perm_builder sel;
5739 vec_perm_indices indices;
5740
5741 int elt_offset;
5742
5743 tree zero_vec = build_zero_cst (vectype1);
5744 /* Case 2: Create:
5745 for (offset = nelements/2; offset >= 1; offset/=2)
5746 {
5747 Create: va' = vec_shift <va, offset>
5748 Create: va = vop <va, va'>
5749 } */
5750
5751 tree rhs;
5752
5753 if (dump_enabled_p ())
5754 dump_printf_loc (MSG_NOTE, vect_location,
5755 "Reduce using vector shifts\n");
5756
5757 gimple_seq stmts = NULLnullptr;
5758 new_temp = gimple_convert (&stmts, vectype1, new_temp);
5759 for (elt_offset = nelements / 2;
5760 elt_offset >= 1;
5761 elt_offset /= 2)
5762 {
5763 calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
5764 indices.new_vector (sel, 2, nelements);
5765 tree mask = vect_gen_perm_mask_any (vectype1, indices);
5766 new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1,
5767 new_temp, zero_vec, mask);
5768 new_temp = gimple_build (&stmts, code,
5769 vectype1, new_name, new_temp);
5770 }
5771 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5772
5773 /* 2.4 Extract the final scalar result. Create:
5774 s_out3 = extract_field <v_out2, b