File: | build/gcc/tree-vect-data-refs.cc |
Warning: | line 4244, column 24 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Data References Analysis and Manipulation Utilities for Vectorization. | ||||
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. | ||||
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> | ||||
4 | and Ira Rosen <irar@il.ibm.com> | ||||
5 | |||||
6 | This file is part of GCC. | ||||
7 | |||||
8 | GCC is free software; you can redistribute it and/or modify it under | ||||
9 | the terms of the GNU General Public License as published by the Free | ||||
10 | Software Foundation; either version 3, or (at your option) any later | ||||
11 | version. | ||||
12 | |||||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | ||||
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||||
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||||
16 | for more details. | ||||
17 | |||||
18 | You should have received a copy of the GNU General Public License | ||||
19 | along with GCC; see the file COPYING3. If not see | ||||
20 | <http://www.gnu.org/licenses/>. */ | ||||
21 | |||||
22 | #include "config.h" | ||||
23 | #include "system.h" | ||||
24 | #include "coretypes.h" | ||||
25 | #include "backend.h" | ||||
26 | #include "target.h" | ||||
27 | #include "rtl.h" | ||||
28 | #include "tree.h" | ||||
29 | #include "gimple.h" | ||||
30 | #include "predict.h" | ||||
31 | #include "memmodel.h" | ||||
32 | #include "tm_p.h" | ||||
33 | #include "ssa.h" | ||||
34 | #include "optabs-tree.h" | ||||
35 | #include "cgraph.h" | ||||
36 | #include "dumpfile.h" | ||||
37 | #include "alias.h" | ||||
38 | #include "fold-const.h" | ||||
39 | #include "stor-layout.h" | ||||
40 | #include "tree-eh.h" | ||||
41 | #include "gimplify.h" | ||||
42 | #include "gimple-iterator.h" | ||||
43 | #include "gimplify-me.h" | ||||
44 | #include "tree-ssa-loop-ivopts.h" | ||||
45 | #include "tree-ssa-loop-manip.h" | ||||
46 | #include "tree-ssa-loop.h" | ||||
47 | #include "cfgloop.h" | ||||
48 | #include "tree-scalar-evolution.h" | ||||
49 | #include "tree-vectorizer.h" | ||||
50 | #include "expr.h" | ||||
51 | #include "builtins.h" | ||||
52 | #include "tree-cfg.h" | ||||
53 | #include "tree-hash-traits.h" | ||||
54 | #include "vec-perm-indices.h" | ||||
55 | #include "internal-fn.h" | ||||
56 | #include "gimple-fold.h" | ||||
57 | |||||
58 | /* Return true if load- or store-lanes optab OPTAB is implemented for | ||||
59 | COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ | ||||
60 | |||||
61 | static bool | ||||
62 | vect_lanes_optab_supported_p (const char *name, convert_optab optab, | ||||
63 | tree vectype, unsigned HOST_WIDE_INTlong count) | ||||
64 | { | ||||
65 | machine_mode mode, array_mode; | ||||
66 | bool limit_p; | ||||
67 | |||||
68 | mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 68, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode); | ||||
69 | if (!targetm.array_mode (mode, count).exists (&array_mode)) | ||||
70 | { | ||||
71 | poly_uint64 bits = count * GET_MODE_BITSIZE (mode); | ||||
72 | limit_p = !targetm.array_mode_supported_p (mode, count); | ||||
73 | if (!int_mode_for_size (bits, limit_p).exists (&array_mode)) | ||||
74 | { | ||||
75 | if (dump_enabled_p ()) | ||||
76 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
77 | "no array mode for %s[%wu]\n", | ||||
78 | GET_MODE_NAME (mode)mode_name[mode], count); | ||||
79 | return false; | ||||
80 | } | ||||
81 | } | ||||
82 | |||||
83 | if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing) | ||||
84 | { | ||||
85 | if (dump_enabled_p ()) | ||||
86 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
87 | "cannot use %s<%s><%s>\n", name, | ||||
88 | GET_MODE_NAME (array_mode)mode_name[array_mode], GET_MODE_NAME (mode)mode_name[mode]); | ||||
89 | return false; | ||||
90 | } | ||||
91 | |||||
92 | if (dump_enabled_p ()) | ||||
93 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
94 | "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode)mode_name[array_mode], | ||||
95 | GET_MODE_NAME (mode)mode_name[mode]); | ||||
96 | |||||
97 | return true; | ||||
98 | } | ||||
99 | |||||
100 | |||||
101 | /* Return the smallest scalar part of STMT_INFO. | ||||
102 | This is used to determine the vectype of the stmt. We generally set the | ||||
103 | vectype according to the type of the result (lhs). For stmts whose | ||||
104 | result-type is different than the type of the arguments (e.g., demotion, | ||||
105 | promotion), vectype will be reset appropriately (later). Note that we have | ||||
106 | to visit the smallest datatype in this function, because that determines the | ||||
107 | VF. If the smallest datatype in the loop is present only as the rhs of a | ||||
108 | promotion operation - we'd miss it. | ||||
109 | Such a case, where a variable of this datatype does not appear in the lhs | ||||
110 | anywhere in the loop, can only occur if it's an invariant: e.g.: | ||||
111 | 'int_x = (int) short_inv', which we'd expect to have been optimized away by | ||||
112 | invariant motion. However, we cannot rely on invariant motion to always | ||||
113 | take invariants out of the loop, and so in the case of promotion we also | ||||
114 | have to check the rhs. | ||||
115 | LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding | ||||
116 | types. */ | ||||
117 | |||||
118 | tree | ||||
119 | vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type) | ||||
120 | { | ||||
121 | HOST_WIDE_INTlong lhs, rhs; | ||||
122 | |||||
123 | /* During the analysis phase, this function is called on arbitrary | ||||
124 | statements that might not have scalar results. */ | ||||
125 | if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)((tree_class_check ((scalar_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 125, __FUNCTION__))->type_common.size_unit))) | ||||
126 | return scalar_type; | ||||
127 | |||||
128 | lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type))((unsigned long) (*tree_int_cst_elt_check ((((tree_class_check ((scalar_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 128, __FUNCTION__))->type_common.size_unit)), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 128, __FUNCTION__))); | ||||
129 | |||||
130 | gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); | ||||
131 | if (assign) | ||||
132 | { | ||||
133 | scalar_type = TREE_TYPE (gimple_assign_lhs (assign))((contains_struct_check ((gimple_assign_lhs (assign)), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 133, __FUNCTION__))->typed.type); | ||||
134 | if (gimple_assign_cast_p (assign) | ||||
135 | || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR | ||||
136 | || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR | ||||
137 | || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR | ||||
138 | || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR | ||||
139 | || gimple_assign_rhs_code (assign) == WIDEN_PLUS_EXPR | ||||
140 | || gimple_assign_rhs_code (assign) == WIDEN_MINUS_EXPR | ||||
141 | || gimple_assign_rhs_code (assign) == FLOAT_EXPR) | ||||
142 | { | ||||
143 | tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign))((contains_struct_check ((gimple_assign_rhs1 (assign)), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 143, __FUNCTION__))->typed.type); | ||||
144 | |||||
145 | rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type))((unsigned long) (*tree_int_cst_elt_check ((((tree_class_check ((rhs_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 145, __FUNCTION__))->type_common.size_unit)), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 145, __FUNCTION__))); | ||||
146 | if (rhs < lhs) | ||||
147 | scalar_type = rhs_type; | ||||
148 | } | ||||
149 | } | ||||
150 | else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt)) | ||||
151 | { | ||||
152 | unsigned int i = 0; | ||||
153 | if (gimple_call_internal_p (call)) | ||||
154 | { | ||||
155 | internal_fn ifn = gimple_call_internal_fn (call); | ||||
156 | if (internal_load_fn_p (ifn)) | ||||
157 | /* For loads the LHS type does the trick. */ | ||||
158 | i = ~0U; | ||||
159 | else if (internal_store_fn_p (ifn)) | ||||
160 | { | ||||
161 | /* For stores use the tyep of the stored value. */ | ||||
162 | i = internal_fn_stored_value_index (ifn); | ||||
163 | scalar_type = TREE_TYPE (gimple_call_arg (call, i))((contains_struct_check ((gimple_call_arg (call, i)), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 163, __FUNCTION__))->typed.type); | ||||
164 | i = ~0U; | ||||
165 | } | ||||
166 | else if (internal_fn_mask_index (ifn) == 0) | ||||
167 | i = 1; | ||||
168 | } | ||||
169 | if (i < gimple_call_num_args (call)) | ||||
170 | { | ||||
171 | tree rhs_type = TREE_TYPE (gimple_call_arg (call, i))((contains_struct_check ((gimple_call_arg (call, i)), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 171, __FUNCTION__))->typed.type); | ||||
172 | if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)((tree_class_check ((rhs_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 172, __FUNCTION__))->type_common.size_unit))) | ||||
173 | { | ||||
174 | rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type))((unsigned long) (*tree_int_cst_elt_check ((((tree_class_check ((rhs_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 174, __FUNCTION__))->type_common.size_unit)), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 174, __FUNCTION__))); | ||||
175 | if (rhs < lhs) | ||||
176 | scalar_type = rhs_type; | ||||
177 | } | ||||
178 | } | ||||
179 | } | ||||
180 | |||||
181 | return scalar_type; | ||||
182 | } | ||||
183 | |||||
184 | |||||
185 | /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be | ||||
186 | tested at run-time. Return TRUE if DDR was successfully inserted. | ||||
187 | Return false if versioning is not supported. */ | ||||
188 | |||||
189 | static opt_result | ||||
190 | vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) | ||||
191 | { | ||||
192 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||
193 | |||||
194 | if ((unsigned) param_vect_max_version_for_alias_checksglobal_options.x_param_vect_max_version_for_alias_checks == 0) | ||||
195 | return opt_result::failure_at (vect_location, | ||||
196 | "will not create alias checks, as" | ||||
197 | " --param vect-max-version-for-alias-checks" | ||||
198 | " == 0\n"); | ||||
199 | |||||
200 | opt_result res | ||||
201 | = runtime_alias_check_p (ddr, loop, | ||||
202 | optimize_loop_nest_for_speed_p (loop)); | ||||
203 | if (!res) | ||||
204 | return res; | ||||
205 | |||||
206 | LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->may_alias_ddrs.safe_push (ddr); | ||||
207 | return opt_result::success (); | ||||
208 | } | ||||
209 | |||||
210 | /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero. */ | ||||
211 | |||||
212 | static void | ||||
213 | vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value) | ||||
214 | { | ||||
215 | const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo)(loop_vinfo)->check_nonzero; | ||||
216 | for (unsigned int i = 0; i < checks.length(); ++i) | ||||
217 | if (checks[i] == value) | ||||
218 | return; | ||||
219 | |||||
220 | if (dump_enabled_p ()) | ||||
221 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
222 | "need run-time check that %T is nonzero\n", | ||||
223 | value); | ||||
224 | LOOP_VINFO_CHECK_NONZERO (loop_vinfo)(loop_vinfo)->check_nonzero.safe_push (value); | ||||
225 | } | ||||
226 | |||||
227 | /* Return true if we know that the order of vectorized DR_INFO_A and | ||||
228 | vectorized DR_INFO_B will be the same as the order of DR_INFO_A and | ||||
229 | DR_INFO_B. At least one of the accesses is a write. */ | ||||
230 | |||||
231 | static bool | ||||
232 | vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b) | ||||
233 | { | ||||
234 | stmt_vec_info stmtinfo_a = dr_info_a->stmt; | ||||
235 | stmt_vec_info stmtinfo_b = dr_info_b->stmt; | ||||
236 | |||||
237 | /* Single statements are always kept in their original order. */ | ||||
238 | if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)((stmtinfo_a)->dr_aux.dr && (((void)(!((stmtinfo_a )->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 238, __FUNCTION__), 0 : 0)), (stmtinfo_a)->first_element )) | ||||
239 | && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)((stmtinfo_b)->dr_aux.dr && (((void)(!((stmtinfo_b )->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 239, __FUNCTION__), 0 : 0)), (stmtinfo_b)->first_element ))) | ||||
240 | return true; | ||||
241 | |||||
242 | /* STMT_A and STMT_B belong to overlapping groups. All loads are | ||||
243 | emitted at the position of the first scalar load. | ||||
244 | Stores in a group are emitted at the position of the last scalar store. | ||||
245 | Compute that position and check whether the resulting order matches | ||||
246 | the current one. */ | ||||
247 | stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a)(((void)(!((stmtinfo_a)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 247, __FUNCTION__), 0 : 0)), (stmtinfo_a)->first_element ); | ||||
248 | if (il_a) | ||||
249 | { | ||||
250 | if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a))(!(((stmtinfo_a)->dr_aux.dr + 0))->is_read)) | ||||
251 | for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a)(((void)(!((il_a)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 251, __FUNCTION__), 0 : 0)), (il_a)->next_element); s; | ||||
252 | s = DR_GROUP_NEXT_ELEMENT (s)(((void)(!((s)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 252, __FUNCTION__), 0 : 0)), (s)->next_element)) | ||||
253 | il_a = get_later_stmt (il_a, s); | ||||
254 | else /* DR_IS_READ */ | ||||
255 | for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a)(((void)(!((il_a)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 255, __FUNCTION__), 0 : 0)), (il_a)->next_element); s; | ||||
256 | s = DR_GROUP_NEXT_ELEMENT (s)(((void)(!((s)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 256, __FUNCTION__), 0 : 0)), (s)->next_element)) | ||||
257 | if (get_later_stmt (il_a, s) == il_a) | ||||
258 | il_a = s; | ||||
259 | } | ||||
260 | else | ||||
261 | il_a = stmtinfo_a; | ||||
262 | stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b)(((void)(!((stmtinfo_b)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 262, __FUNCTION__), 0 : 0)), (stmtinfo_b)->first_element ); | ||||
263 | if (il_b) | ||||
264 | { | ||||
265 | if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b))(!(((stmtinfo_b)->dr_aux.dr + 0))->is_read)) | ||||
266 | for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b)(((void)(!((il_b)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 266, __FUNCTION__), 0 : 0)), (il_b)->next_element); s; | ||||
267 | s = DR_GROUP_NEXT_ELEMENT (s)(((void)(!((s)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 267, __FUNCTION__), 0 : 0)), (s)->next_element)) | ||||
268 | il_b = get_later_stmt (il_b, s); | ||||
269 | else /* DR_IS_READ */ | ||||
270 | for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b)(((void)(!((il_b)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 270, __FUNCTION__), 0 : 0)), (il_b)->next_element); s; | ||||
271 | s = DR_GROUP_NEXT_ELEMENT (s)(((void)(!((s)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 271, __FUNCTION__), 0 : 0)), (s)->next_element)) | ||||
272 | if (get_later_stmt (il_b, s) == il_b) | ||||
273 | il_b = s; | ||||
274 | } | ||||
275 | else | ||||
276 | il_b = stmtinfo_b; | ||||
277 | bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a); | ||||
278 | return (get_later_stmt (il_a, il_b) == il_a) == a_after_b; | ||||
279 | } | ||||
280 | |||||
281 | /* A subroutine of vect_analyze_data_ref_dependence. Handle | ||||
282 | DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence | ||||
283 | distances. These distances are conservatively correct but they don't | ||||
284 | reflect a guaranteed dependence. | ||||
285 | |||||
286 | Return true if this function does all the work necessary to avoid | ||||
287 | an alias or false if the caller should use the dependence distances | ||||
288 | to limit the vectorization factor in the usual way. LOOP_DEPTH is | ||||
289 | the depth of the loop described by LOOP_VINFO and the other arguments | ||||
290 | are as for vect_analyze_data_ref_dependence. */ | ||||
291 | |||||
292 | static bool | ||||
293 | vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr, | ||||
294 | loop_vec_info loop_vinfo, | ||||
295 | int loop_depth, unsigned int *max_vf) | ||||
296 | { | ||||
297 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||
298 | for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr)((ddr)->dist_vects)) | ||||
299 | { | ||||
300 | int dist = dist_v[loop_depth]; | ||||
301 | if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)(ddr)->reversed_p)) | ||||
302 | { | ||||
303 | /* If the user asserted safelen >= DIST consecutive iterations | ||||
304 | can be executed concurrently, assume independence. | ||||
305 | |||||
306 | ??? An alternative would be to add the alias check even | ||||
307 | in this case, and vectorize the fallback loop with the | ||||
308 | maximum VF set to safelen. However, if the user has | ||||
309 | explicitly given a length, it's less likely that that | ||||
310 | would be a win. */ | ||||
311 | if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen) | ||||
312 | { | ||||
313 | if ((unsigned int) loop->safelen < *max_vf) | ||||
314 | *max_vf = loop->safelen; | ||||
315 | LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)(loop_vinfo)->no_data_dependencies = false; | ||||
316 | continue; | ||||
317 | } | ||||
318 | |||||
319 | /* For dependence distances of 2 or more, we have the option | ||||
320 | of limiting VF or checking for an alias at runtime. | ||||
321 | Prefer to check at runtime if we can, to avoid limiting | ||||
322 | the VF unnecessarily when the bases are in fact independent. | ||||
323 | |||||
324 | Note that the alias checks will be removed if the VF ends up | ||||
325 | being small enough. */ | ||||
326 | dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr)(ddr)->a); | ||||
327 | dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr)(ddr)->b); | ||||
328 | return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)(dr_info_a->stmt)->gather_scatter_p | ||||
329 | && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)(dr_info_b->stmt)->gather_scatter_p | ||||
330 | && vect_mark_for_runtime_alias_test (ddr, loop_vinfo)); | ||||
331 | } | ||||
332 | } | ||||
333 | return true; | ||||
334 | } | ||||
335 | |||||
336 | |||||
337 | /* Function vect_analyze_data_ref_dependence. | ||||
338 | |||||
339 | FIXME: I needed to change the sense of the returned flag. | ||||
340 | |||||
341 | Return FALSE if there (might) exist a dependence between a memory-reference | ||||
342 | DRA and a memory-reference DRB. When versioning for alias may check a | ||||
343 | dependence at run-time, return TRUE. Adjust *MAX_VF according to | ||||
344 | the data dependence. */ | ||||
345 | |||||
346 | static opt_result | ||||
347 | vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, | ||||
348 | loop_vec_info loop_vinfo, | ||||
349 | unsigned int *max_vf) | ||||
350 | { | ||||
351 | unsigned int i; | ||||
352 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||
353 | struct data_reference *dra = DDR_A (ddr)(ddr)->a; | ||||
354 | struct data_reference *drb = DDR_B (ddr)(ddr)->b; | ||||
355 | dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra); | ||||
356 | dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb); | ||||
357 | stmt_vec_info stmtinfo_a = dr_info_a->stmt; | ||||
358 | stmt_vec_info stmtinfo_b = dr_info_b->stmt; | ||||
359 | lambda_vector dist_v; | ||||
360 | unsigned int loop_depth; | ||||
361 | |||||
362 | /* If user asserted safelen consecutive iterations can be | ||||
363 | executed concurrently, assume independence. */ | ||||
364 | auto apply_safelen = [&]() | ||||
365 | { | ||||
366 | if (loop->safelen >= 2) | ||||
367 | { | ||||
368 | if ((unsigned int) loop->safelen < *max_vf) | ||||
369 | *max_vf = loop->safelen; | ||||
370 | LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)(loop_vinfo)->no_data_dependencies = false; | ||||
371 | return true; | ||||
372 | } | ||||
373 | return false; | ||||
374 | }; | ||||
375 | |||||
376 | /* In loop analysis all data references should be vectorizable. */ | ||||
377 | if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)(stmtinfo_a)->vectorizable | ||||
378 | || !STMT_VINFO_VECTORIZABLE (stmtinfo_b)(stmtinfo_b)->vectorizable) | ||||
379 | gcc_unreachable ()(fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 379, __FUNCTION__)); | ||||
380 | |||||
381 | /* Independent data accesses. */ | ||||
382 | if (DDR_ARE_DEPENDENT (ddr)(ddr)->are_dependent == chrec_knownglobal_trees[TI_CHREC_KNOWN]) | ||||
383 | return opt_result::success (); | ||||
384 | |||||
385 | if (dra == drb | ||||
386 | || (DR_IS_READ (dra)(dra)->is_read && DR_IS_READ (drb)(drb)->is_read)) | ||||
387 | return opt_result::success (); | ||||
388 | |||||
389 | /* We do not have to consider dependences between accesses that belong | ||||
390 | to the same group, unless the stride could be smaller than the | ||||
391 | group size. */ | ||||
392 | if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)(((void)(!((stmtinfo_a)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 392, __FUNCTION__), 0 : 0)), (stmtinfo_a)->first_element ) | ||||
393 | && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)(((void)(!((stmtinfo_a)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 393, __FUNCTION__), 0 : 0)), (stmtinfo_a)->first_element ) | ||||
394 | == DR_GROUP_FIRST_ELEMENT (stmtinfo_b)(((void)(!((stmtinfo_b)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 394, __FUNCTION__), 0 : 0)), (stmtinfo_b)->first_element )) | ||||
395 | && !STMT_VINFO_STRIDED_P (stmtinfo_a)(stmtinfo_a)->strided_p) | ||||
396 | return opt_result::success (); | ||||
397 | |||||
398 | /* Even if we have an anti-dependence then, as the vectorized loop covers at | ||||
399 | least two scalar iterations, there is always also a true dependence. | ||||
400 | As the vectorizer does not re-order loads and stores we can ignore | ||||
401 | the anti-dependence if TBAA can disambiguate both DRs similar to the | ||||
402 | case with known negative distance anti-dependences (positive | ||||
403 | distance anti-dependences would violate TBAA constraints). */ | ||||
404 | if (((DR_IS_READ (dra)(dra)->is_read && DR_IS_WRITE (drb)(!(drb)->is_read)) | ||||
405 | || (DR_IS_WRITE (dra)(!(dra)->is_read) && DR_IS_READ (drb)(drb)->is_read)) | ||||
406 | && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)(dra)->ref), | ||||
407 | get_alias_set (DR_REF (drb)(drb)->ref))) | ||||
408 | return opt_result::success (); | ||||
409 | |||||
410 | if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)(stmtinfo_a)->gather_scatter_p | ||||
411 | || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b)(stmtinfo_b)->gather_scatter_p) | ||||
412 | { | ||||
413 | if (apply_safelen ()) | ||||
414 | return opt_result::success (); | ||||
415 | |||||
416 | return opt_result::failure_at | ||||
417 | (stmtinfo_a->stmt, | ||||
418 | "possible alias involving gather/scatter between %T and %T\n", | ||||
419 | DR_REF (dra)(dra)->ref, DR_REF (drb)(drb)->ref); | ||||
420 | } | ||||
421 | |||||
422 | /* Unknown data dependence. */ | ||||
423 | if (DDR_ARE_DEPENDENT (ddr)(ddr)->are_dependent == chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]) | ||||
424 | { | ||||
425 | if (apply_safelen ()) | ||||
426 | return opt_result::success (); | ||||
427 | |||||
428 | if (dump_enabled_p ()) | ||||
429 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt, | ||||
430 | "versioning for alias required: " | ||||
431 | "can't determine dependence between %T and %T\n", | ||||
432 | DR_REF (dra)(dra)->ref, DR_REF (drb)(drb)->ref); | ||||
433 | |||||
434 | /* Add to list of ddrs that need to be tested at run-time. */ | ||||
435 | return vect_mark_for_runtime_alias_test (ddr, loop_vinfo); | ||||
436 | } | ||||
437 | |||||
438 | /* Known data dependence. */ | ||||
439 | if (DDR_NUM_DIST_VECTS (ddr)(((ddr)->dist_vects).length ()) == 0) | ||||
440 | { | ||||
441 | if (apply_safelen ()) | ||||
442 | return opt_result::success (); | ||||
443 | |||||
444 | if (dump_enabled_p ()) | ||||
445 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt, | ||||
446 | "versioning for alias required: " | ||||
447 | "bad dist vector for %T and %T\n", | ||||
448 | DR_REF (dra)(dra)->ref, DR_REF (drb)(drb)->ref); | ||||
449 | /* Add to list of ddrs that need to be tested at run-time. */ | ||||
450 | return vect_mark_for_runtime_alias_test (ddr, loop_vinfo); | ||||
451 | } | ||||
452 | |||||
453 | loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr)(ddr)->loop_nest); | ||||
454 | |||||
455 | if (DDR_COULD_BE_INDEPENDENT_P (ddr)(ddr)->could_be_independent_p | ||||
456 | && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo, | ||||
457 | loop_depth, max_vf)) | ||||
458 | return opt_result::success (); | ||||
459 | |||||
460 | FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)for (i = 0; (((ddr)->dist_vects)).iterate ((i), &(dist_v )); ++(i)) | ||||
461 | { | ||||
462 | int dist = dist_v[loop_depth]; | ||||
463 | |||||
464 | if (dump_enabled_p ()) | ||||
465 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
466 | "dependence distance = %d.\n", dist); | ||||
467 | |||||
468 | if (dist == 0) | ||||
469 | { | ||||
470 | if (dump_enabled_p ()) | ||||
471 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
472 | "dependence distance == 0 between %T and %T\n", | ||||
473 | DR_REF (dra)(dra)->ref, DR_REF (drb)(drb)->ref); | ||||
474 | |||||
475 | /* When we perform grouped accesses and perform implicit CSE | ||||
476 | by detecting equal accesses and doing disambiguation with | ||||
477 | runtime alias tests like for | ||||
478 | .. = a[i]; | ||||
479 | .. = a[i+1]; | ||||
480 | a[i] = ..; | ||||
481 | a[i+1] = ..; | ||||
482 | *p = ..; | ||||
483 | .. = a[i]; | ||||
484 | .. = a[i+1]; | ||||
485 | where we will end up loading { a[i], a[i+1] } once, make | ||||
486 | sure that inserting group loads before the first load and | ||||
487 | stores after the last store will do the right thing. | ||||
488 | Similar for groups like | ||||
489 | a[i] = ...; | ||||
490 | ... = a[i]; | ||||
491 | a[i+1] = ...; | ||||
492 | where loads from the group interleave with the store. */ | ||||
493 | if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b)) | ||||
494 | return opt_result::failure_at (stmtinfo_a->stmt, | ||||
495 | "READ_WRITE dependence" | ||||
496 | " in interleaving.\n"); | ||||
497 | |||||
498 | if (loop->safelen < 2) | ||||
499 | { | ||||
500 | tree indicator = dr_zero_step_indicator (dra); | ||||
501 | if (!indicator || integer_zerop (indicator)) | ||||
502 | return opt_result::failure_at (stmtinfo_a->stmt, | ||||
503 | "access also has a zero step\n"); | ||||
504 | else if (TREE_CODE (indicator)((enum tree_code) (indicator)->base.code) != INTEGER_CST) | ||||
505 | vect_check_nonzero_value (loop_vinfo, indicator); | ||||
506 | } | ||||
507 | continue; | ||||
508 | } | ||||
509 | |||||
510 | if (dist > 0 && DDR_REVERSED_P (ddr)(ddr)->reversed_p) | ||||
511 | { | ||||
512 | /* If DDR_REVERSED_P the order of the data-refs in DDR was | ||||
513 | reversed (to make distance vector positive), and the actual | ||||
514 | distance is negative. */ | ||||
515 | if (dump_enabled_p ()) | ||||
516 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
517 | "dependence distance negative.\n"); | ||||
518 | /* When doing outer loop vectorization, we need to check if there is | ||||
519 | a backward dependence at the inner loop level if the dependence | ||||
520 | at the outer loop is reversed. See PR81740. */ | ||||
521 | if (nested_in_vect_loop_p (loop, stmtinfo_a) | ||||
522 | || nested_in_vect_loop_p (loop, stmtinfo_b)) | ||||
523 | { | ||||
524 | unsigned inner_depth = index_in_loop_nest (loop->inner->num, | ||||
525 | DDR_LOOP_NEST (ddr)(ddr)->loop_nest); | ||||
526 | if (dist_v[inner_depth] < 0) | ||||
527 | return opt_result::failure_at (stmtinfo_a->stmt, | ||||
528 | "not vectorized, dependence " | ||||
529 | "between data-refs %T and %T\n", | ||||
530 | DR_REF (dra)(dra)->ref, DR_REF (drb)(drb)->ref); | ||||
531 | } | ||||
532 | /* Record a negative dependence distance to later limit the | ||||
533 | amount of stmt copying / unrolling we can perform. | ||||
534 | Only need to handle read-after-write dependence. */ | ||||
535 | if (DR_IS_READ (drb)(drb)->is_read | ||||
536 | && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b)(stmtinfo_b)->min_neg_dist == 0 | ||||
537 | || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b)(stmtinfo_b)->min_neg_dist > (unsigned)dist)) | ||||
538 | STMT_VINFO_MIN_NEG_DIST (stmtinfo_b)(stmtinfo_b)->min_neg_dist = dist; | ||||
539 | continue; | ||||
540 | } | ||||
541 | |||||
542 | unsigned int abs_dist = abs (dist); | ||||
543 | if (abs_dist >= 2 && abs_dist < *max_vf) | ||||
544 | { | ||||
545 | /* The dependence distance requires reduction of the maximal | ||||
546 | vectorization factor. */ | ||||
547 | *max_vf = abs_dist; | ||||
548 | if (dump_enabled_p ()) | ||||
549 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
550 | "adjusting maximal vectorization factor to %i\n", | ||||
551 | *max_vf); | ||||
552 | } | ||||
553 | |||||
554 | if (abs_dist >= *max_vf) | ||||
555 | { | ||||
556 | /* Dependence distance does not create dependence, as far as | ||||
557 | vectorization is concerned, in this case. */ | ||||
558 | if (dump_enabled_p ()) | ||||
559 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
560 | "dependence distance >= VF.\n"); | ||||
561 | continue; | ||||
562 | } | ||||
563 | |||||
564 | return opt_result::failure_at (stmtinfo_a->stmt, | ||||
565 | "not vectorized, possible dependence " | ||||
566 | "between data-refs %T and %T\n", | ||||
567 | DR_REF (dra)(dra)->ref, DR_REF (drb)(drb)->ref); | ||||
568 | } | ||||
569 | |||||
570 | return opt_result::success (); | ||||
571 | } | ||||
572 | |||||
573 | /* Function vect_analyze_data_ref_dependences. | ||||
574 | |||||
575 | Examine all the data references in the loop, and make sure there do not | ||||
576 | exist any data dependences between them. Set *MAX_VF according to | ||||
577 | the maximum vectorization factor the data dependences allow. */ | ||||
578 | |||||
579 | opt_result | ||||
580 | vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, | ||||
581 | unsigned int *max_vf) | ||||
582 | { | ||||
583 | unsigned int i; | ||||
584 | struct data_dependence_relation *ddr; | ||||
585 | |||||
586 | DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences")auto_dump_scope scope ("vect_analyze_data_ref_dependences", vect_location ); | ||||
587 | |||||
588 | if (!LOOP_VINFO_DDRS (loop_vinfo)(loop_vinfo)->shared->ddrs.exists ()) | ||||
589 | { | ||||
590 | LOOP_VINFO_DDRS (loop_vinfo)(loop_vinfo)->shared->ddrs | ||||
591 | .create (LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.length () | ||||
592 | * LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.length ()); | ||||
593 | /* We do not need read-read dependences. */ | ||||
594 | bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs, | ||||
595 | &LOOP_VINFO_DDRS (loop_vinfo)(loop_vinfo)->shared->ddrs, | ||||
596 | LOOP_VINFO_LOOP_NEST (loop_vinfo)(loop_vinfo)->shared->loop_nest, | ||||
597 | false); | ||||
598 | gcc_assert (res)((void)(!(res) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 598, __FUNCTION__), 0 : 0)); | ||||
599 | } | ||||
600 | |||||
601 | LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)(loop_vinfo)->no_data_dependencies = true; | ||||
602 | |||||
603 | /* For epilogues we either have no aliases or alias versioning | ||||
604 | was applied to original loop. Therefore we may just get max_vf | ||||
605 | using VF of original loop. */ | ||||
606 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) | ||||
607 | *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo)(((loop_vinfo)->orig_loop_info)->max_vectorization_factor ); | ||||
608 | else | ||||
609 | FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)for (i = 0; ((loop_vinfo)->shared->ddrs).iterate ((i), & (ddr)); ++(i)) | ||||
610 | { | ||||
611 | opt_result res | ||||
612 | = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf); | ||||
613 | if (!res) | ||||
614 | return res; | ||||
615 | } | ||||
616 | |||||
617 | return opt_result::success (); | ||||
618 | } | ||||
619 | |||||
620 | |||||
621 | /* Function vect_slp_analyze_data_ref_dependence. | ||||
622 | |||||
623 | Return TRUE if there (might) exist a dependence between a memory-reference | ||||
624 | DRA and a memory-reference DRB for VINFO. When versioning for alias | ||||
625 | may check a dependence at run-time, return FALSE. Adjust *MAX_VF | ||||
626 | according to the data dependence. */ | ||||
627 | |||||
628 | static bool | ||||
629 | vect_slp_analyze_data_ref_dependence (vec_info *vinfo, | ||||
630 | struct data_dependence_relation *ddr) | ||||
631 | { | ||||
632 | struct data_reference *dra = DDR_A (ddr)(ddr)->a; | ||||
633 | struct data_reference *drb = DDR_B (ddr)(ddr)->b; | ||||
634 | dr_vec_info *dr_info_a = vinfo->lookup_dr (dra); | ||||
635 | dr_vec_info *dr_info_b = vinfo->lookup_dr (drb); | ||||
636 | |||||
637 | /* We need to check dependences of statements marked as unvectorizable | ||||
638 | as well, they still can prohibit vectorization. */ | ||||
639 | |||||
640 | /* Independent data accesses. */ | ||||
641 | if (DDR_ARE_DEPENDENT (ddr)(ddr)->are_dependent == chrec_knownglobal_trees[TI_CHREC_KNOWN]) | ||||
642 | return false; | ||||
643 | |||||
644 | if (dra == drb) | ||||
645 | return false; | ||||
646 | |||||
647 | /* Read-read is OK. */ | ||||
648 | if (DR_IS_READ (dra)(dra)->is_read && DR_IS_READ (drb)(drb)->is_read) | ||||
649 | return false; | ||||
650 | |||||
651 | /* If dra and drb are part of the same interleaving chain consider | ||||
652 | them independent. */ | ||||
653 | if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)((dr_info_a->stmt)->dr_aux.dr && (((void)(!((dr_info_a ->stmt)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 653, __FUNCTION__), 0 : 0)), (dr_info_a->stmt)->first_element )) | ||||
654 | && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)(((void)(!((dr_info_a->stmt)->dr_aux.dr) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 654, __FUNCTION__), 0 : 0)), (dr_info_a->stmt)->first_element ) | ||||
655 | == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)(((void)(!((dr_info_b->stmt)->dr_aux.dr) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 655, __FUNCTION__), 0 : 0)), (dr_info_b->stmt)->first_element ))) | ||||
656 | return false; | ||||
657 | |||||
658 | /* Unknown data dependence. */ | ||||
659 | if (DDR_ARE_DEPENDENT (ddr)(ddr)->are_dependent == chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]) | ||||
660 | { | ||||
661 | if (dump_enabled_p ()) | ||||
662 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
663 | "can't determine dependence between %T and %T\n", | ||||
664 | DR_REF (dra)(dra)->ref, DR_REF (drb)(drb)->ref); | ||||
665 | } | ||||
666 | else if (dump_enabled_p ()) | ||||
667 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
668 | "determined dependence between %T and %T\n", | ||||
669 | DR_REF (dra)(dra)->ref, DR_REF (drb)(drb)->ref); | ||||
670 | |||||
671 | return true; | ||||
672 | } | ||||
673 | |||||
674 | |||||
675 | /* Analyze dependences involved in the transform of SLP NODE. STORES | ||||
676 | contain the vector of scalar stores of this instance if we are | ||||
677 | disambiguating the loads. */ | ||||
678 | |||||
679 | static bool | ||||
680 | vect_slp_analyze_node_dependences (vec_info *vinfo, slp_tree node, | ||||
681 | vec<stmt_vec_info> stores, | ||||
682 | stmt_vec_info last_store_info) | ||||
683 | { | ||||
684 | /* This walks over all stmts involved in the SLP load/store done | ||||
685 | in NODE verifying we can sink them up to the last stmt in the | ||||
686 | group. */ | ||||
687 | if (DR_IS_WRITE (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node)))(!((((node)->representative)->dr_aux.dr + 0))->is_read )) | ||||
688 | { | ||||
689 | stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node); | ||||
690 | for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node)(node)->stmts.length (); ++k) | ||||
691 | { | ||||
692 | stmt_vec_info access_info | ||||
693 | = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)(node)->stmts[k]); | ||||
694 | if (access_info == last_access_info) | ||||
695 | continue; | ||||
696 | data_reference *dr_a = STMT_VINFO_DATA_REF (access_info)((access_info)->dr_aux.dr + 0); | ||||
697 | ao_ref ref; | ||||
698 | bool ref_initialized_p = false; | ||||
699 | for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt); | ||||
700 | gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi)) | ||||
701 | { | ||||
702 | gimple *stmt = gsi_stmt (gsi); | ||||
703 | if (! gimple_vuse (stmt)) | ||||
704 | continue; | ||||
705 | |||||
706 | /* If we couldn't record a (single) data reference for this | ||||
707 | stmt we have to resort to the alias oracle. */ | ||||
708 | stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt); | ||||
709 | data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0); | ||||
710 | if (!dr_b) | ||||
711 | { | ||||
712 | /* We are moving a store - this means | ||||
713 | we cannot use TBAA for disambiguation. */ | ||||
714 | if (!ref_initialized_p) | ||||
715 | ao_ref_init (&ref, DR_REF (dr_a)(dr_a)->ref); | ||||
716 | if (stmt_may_clobber_ref_p_1 (stmt, &ref, false) | ||||
717 | || ref_maybe_used_by_stmt_p (stmt, &ref, false)) | ||||
718 | return false; | ||||
719 | continue; | ||||
720 | } | ||||
721 | |||||
722 | bool dependent = false; | ||||
723 | /* If we run into a store of this same instance (we've just | ||||
724 | marked those) then delay dependence checking until we run | ||||
725 | into the last store because this is where it will have | ||||
726 | been sunk to (and we verify if we can do that as well). */ | ||||
727 | if (gimple_visited_p (stmt)) | ||||
728 | { | ||||
729 | if (stmt_info != last_store_info) | ||||
730 | continue; | ||||
731 | |||||
732 | for (stmt_vec_info &store_info : stores) | ||||
733 | { | ||||
734 | data_reference *store_dr | ||||
735 | = STMT_VINFO_DATA_REF (store_info)((store_info)->dr_aux.dr + 0); | ||||
736 | ddr_p ddr = initialize_data_dependence_relation | ||||
737 | (dr_a, store_dr, vNULL); | ||||
738 | dependent | ||||
739 | = vect_slp_analyze_data_ref_dependence (vinfo, ddr); | ||||
740 | free_dependence_relation (ddr); | ||||
741 | if (dependent) | ||||
742 | break; | ||||
743 | } | ||||
744 | } | ||||
745 | else | ||||
746 | { | ||||
747 | ddr_p ddr = initialize_data_dependence_relation (dr_a, | ||||
748 | dr_b, vNULL); | ||||
749 | dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr); | ||||
750 | free_dependence_relation (ddr); | ||||
751 | } | ||||
752 | if (dependent) | ||||
753 | return false; | ||||
754 | } | ||||
755 | } | ||||
756 | } | ||||
757 | else /* DR_IS_READ */ | ||||
758 | { | ||||
759 | stmt_vec_info first_access_info | ||||
760 | = vect_find_first_scalar_stmt_in_slp (node); | ||||
761 | for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node)(node)->stmts.length (); ++k) | ||||
762 | { | ||||
763 | stmt_vec_info access_info | ||||
764 | = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)(node)->stmts[k]); | ||||
765 | if (access_info == first_access_info) | ||||
766 | continue; | ||||
767 | data_reference *dr_a = STMT_VINFO_DATA_REF (access_info)((access_info)->dr_aux.dr + 0); | ||||
768 | ao_ref ref; | ||||
769 | bool ref_initialized_p = false; | ||||
770 | for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt); | ||||
771 | gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi)) | ||||
772 | { | ||||
773 | gimple *stmt = gsi_stmt (gsi); | ||||
774 | if (! gimple_vdef (stmt)) | ||||
775 | continue; | ||||
776 | |||||
777 | /* If we couldn't record a (single) data reference for this | ||||
778 | stmt we have to resort to the alias oracle. */ | ||||
779 | stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt); | ||||
780 | data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0); | ||||
781 | |||||
782 | /* We are hoisting a load - this means we can use | ||||
783 | TBAA for disambiguation. */ | ||||
784 | if (!ref_initialized_p) | ||||
785 | ao_ref_init (&ref, DR_REF (dr_a)(dr_a)->ref); | ||||
786 | if (stmt_may_clobber_ref_p_1 (stmt, &ref, true)) | ||||
787 | { | ||||
788 | if (!dr_b) | ||||
789 | return false; | ||||
790 | /* Resort to dependence checking below. */ | ||||
791 | } | ||||
792 | else | ||||
793 | /* No dependence. */ | ||||
794 | continue; | ||||
795 | |||||
796 | bool dependent = false; | ||||
797 | /* If we run into a store of this same instance (we've just | ||||
798 | marked those) then delay dependence checking until we run | ||||
799 | into the last store because this is where it will have | ||||
800 | been sunk to (and we verify if we can do that as well). */ | ||||
801 | if (gimple_visited_p (stmt)) | ||||
802 | { | ||||
803 | if (stmt_info != last_store_info) | ||||
804 | continue; | ||||
805 | |||||
806 | for (stmt_vec_info &store_info : stores) | ||||
807 | { | ||||
808 | data_reference *store_dr | ||||
809 | = STMT_VINFO_DATA_REF (store_info)((store_info)->dr_aux.dr + 0); | ||||
810 | ddr_p ddr = initialize_data_dependence_relation | ||||
811 | (dr_a, store_dr, vNULL); | ||||
812 | dependent | ||||
813 | = vect_slp_analyze_data_ref_dependence (vinfo, ddr); | ||||
814 | free_dependence_relation (ddr); | ||||
815 | if (dependent) | ||||
816 | break; | ||||
817 | } | ||||
818 | } | ||||
819 | else | ||||
820 | { | ||||
821 | ddr_p ddr = initialize_data_dependence_relation (dr_a, | ||||
822 | dr_b, vNULL); | ||||
823 | dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr); | ||||
824 | free_dependence_relation (ddr); | ||||
825 | } | ||||
826 | if (dependent) | ||||
827 | return false; | ||||
828 | } | ||||
829 | } | ||||
830 | } | ||||
831 | return true; | ||||
832 | } | ||||
833 | |||||
834 | |||||
835 | /* Function vect_analyze_data_ref_dependences. | ||||
836 | |||||
837 | Examine all the data references in the basic-block, and make sure there | ||||
838 | do not exist any data dependences between them. Set *MAX_VF according to | ||||
839 | the maximum vectorization factor the data dependences allow. */ | ||||
840 | |||||
841 | bool | ||||
842 | vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance) | ||||
843 | { | ||||
844 | DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence")auto_dump_scope scope ("vect_slp_analyze_instance_dependence" , vect_location); | ||||
845 | |||||
846 | /* The stores of this instance are at the root of the SLP tree. */ | ||||
847 | slp_tree store = NULLnullptr; | ||||
848 | if (SLP_INSTANCE_KIND (instance)(instance)->kind == slp_inst_kind_store) | ||||
849 | store = SLP_INSTANCE_TREE (instance)(instance)->root; | ||||
850 | |||||
851 | /* Verify we can sink stores to the vectorized stmt insert location. */ | ||||
852 | stmt_vec_info last_store_info = NULLnullptr; | ||||
853 | if (store) | ||||
854 | { | ||||
855 | if (! vect_slp_analyze_node_dependences (vinfo, store, vNULL, NULLnullptr)) | ||||
856 | return false; | ||||
857 | |||||
858 | /* Mark stores in this instance and remember the last one. */ | ||||
859 | last_store_info = vect_find_last_scalar_stmt_in_slp (store); | ||||
860 | for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store)(store)->stmts.length (); ++k) | ||||
861 | gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)(store)->stmts[k]->stmt, true); | ||||
862 | } | ||||
863 | |||||
864 | bool res = true; | ||||
865 | |||||
866 | /* Verify we can sink loads to the vectorized stmt insert location, | ||||
867 | special-casing stores of this instance. */ | ||||
868 | for (slp_tree &load : SLP_INSTANCE_LOADS (instance)(instance)->loads) | ||||
869 | if (! vect_slp_analyze_node_dependences (vinfo, load, | ||||
870 | store | ||||
871 | ? SLP_TREE_SCALAR_STMTS (store)(store)->stmts | ||||
872 | : vNULL, last_store_info)) | ||||
873 | { | ||||
874 | res = false; | ||||
875 | break; | ||||
876 | } | ||||
877 | |||||
878 | /* Unset the visited flag. */ | ||||
879 | if (store) | ||||
880 | for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store)(store)->stmts.length (); ++k) | ||||
881 | gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)(store)->stmts[k]->stmt, false); | ||||
882 | |||||
883 | return res; | ||||
884 | } | ||||
885 | |||||
886 | /* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET | ||||
887 | applied. */ | ||||
888 | |||||
889 | int | ||||
890 | dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset) | ||||
891 | { | ||||
892 | HOST_WIDE_INTlong diff = 0; | ||||
893 | /* Alignment is only analyzed for the first element of a DR group, | ||||
894 | use that but adjust misalignment by the offset of the access. */ | ||||
895 | if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)((dr_info->stmt)->dr_aux.dr && (((void)(!((dr_info ->stmt)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 895, __FUNCTION__), 0 : 0)), (dr_info->stmt)->first_element ))) | ||||
896 | { | ||||
897 | dr_vec_info *first_dr | ||||
898 | = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt))(((void)(!(((((void)(!((dr_info->stmt)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 898, __FUNCTION__), 0 : 0)), (dr_info->stmt)->first_element ))->dr_aux.stmt == ((((void)(!((dr_info->stmt)->dr_aux .dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 898, __FUNCTION__), 0 : 0)), (dr_info->stmt)->first_element ))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 898, __FUNCTION__), 0 : 0)), &((((void)(!((dr_info-> stmt)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 898, __FUNCTION__), 0 : 0)), (dr_info->stmt)->first_element ))->dr_aux); | ||||
899 | /* vect_analyze_data_ref_accesses guarantees that DR_INIT are | ||||
900 | INTEGER_CSTs and the first element in the group has the lowest | ||||
901 | address. */ | ||||
902 | diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info->dr)-> innermost.init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 902, __FUNCTION__))) | ||||
903 | - TREE_INT_CST_LOW (DR_INIT (first_dr->dr))((unsigned long) (*tree_int_cst_elt_check (((first_dr->dr) ->innermost.init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 903, __FUNCTION__)))); | ||||
904 | gcc_assert (diff >= 0)((void)(!(diff >= 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 904, __FUNCTION__), 0 : 0)); | ||||
905 | dr_info = first_dr; | ||||
906 | } | ||||
907 | |||||
908 | int misalign = dr_info->misalignment; | ||||
909 | gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED)((void)(!(misalign != (-2)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 909, __FUNCTION__), 0 : 0)); | ||||
910 | if (misalign == DR_MISALIGNMENT_UNKNOWN(-1)) | ||||
911 | return misalign; | ||||
912 | |||||
913 | /* If the access is only aligned for a vector type with smaller alignment | ||||
914 | requirement the access has unknown misalignment. */ | ||||
915 | if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT(8), | ||||
916 | targetm.vectorize.preferred_vector_alignment (vectype))) | ||||
917 | return DR_MISALIGNMENT_UNKNOWN(-1); | ||||
918 | |||||
919 | /* Apply the offset from the DR group start and the externally supplied | ||||
920 | offset which can for example result from a negative stride access. */ | ||||
921 | poly_int64 misalignment = misalign + diff + offset; | ||||
922 | |||||
923 | /* vect_compute_data_ref_alignment will have ensured that target_alignment | ||||
924 | is constant and otherwise set misalign to DR_MISALIGNMENT_UNKNOWN. */ | ||||
925 | unsigned HOST_WIDE_INTlong target_alignment_c | ||||
926 | = dr_info->target_alignment.to_constant (); | ||||
927 | if (!known_misalignment (misalignment, target_alignment_c, &misalign)) | ||||
928 | return DR_MISALIGNMENT_UNKNOWN(-1); | ||||
929 | return misalign; | ||||
930 | } | ||||
931 | |||||
932 | /* Record the base alignment guarantee given by DRB, which occurs | ||||
933 | in STMT_INFO. */ | ||||
934 | |||||
935 | static void | ||||
936 | vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info, | ||||
937 | innermost_loop_behavior *drb) | ||||
938 | { | ||||
939 | bool existed; | ||||
940 | std::pair<stmt_vec_info, innermost_loop_behavior *> &entry | ||||
941 | = vinfo->base_alignments.get_or_insert (drb->base_address, &existed); | ||||
942 | if (!existed || entry.second->base_alignment < drb->base_alignment) | ||||
943 | { | ||||
944 | entry = std::make_pair (stmt_info, drb); | ||||
945 | if (dump_enabled_p ()) | ||||
946 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
947 | "recording new base alignment for %T\n" | ||||
948 | " alignment: %d\n" | ||||
949 | " misalignment: %d\n" | ||||
950 | " based on: %G", | ||||
951 | drb->base_address, | ||||
952 | drb->base_alignment, | ||||
953 | drb->base_misalignment, | ||||
954 | stmt_info->stmt); | ||||
955 | } | ||||
956 | } | ||||
957 | |||||
958 | /* If the region we're going to vectorize is reached, all unconditional | ||||
959 | data references occur at least once. We can therefore pool the base | ||||
960 | alignment guarantees from each unconditional reference. Do this by | ||||
961 | going through all the data references in VINFO and checking whether | ||||
962 | the containing statement makes the reference unconditionally. If so, | ||||
963 | record the alignment of the base address in VINFO so that it can be | ||||
964 | used for all other references with the same base. */ | ||||
965 | |||||
966 | void | ||||
967 | vect_record_base_alignments (vec_info *vinfo) | ||||
968 | { | ||||
969 | loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); | ||||
970 | class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop : NULLnullptr; | ||||
971 | for (data_reference *dr : vinfo->shared->datarefs) | ||||
972 | { | ||||
973 | dr_vec_info *dr_info = vinfo->lookup_dr (dr); | ||||
974 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
975 | if (!DR_IS_CONDITIONAL_IN_STMT (dr)(dr)->is_conditional_in_stmt | ||||
976 | && STMT_VINFO_VECTORIZABLE (stmt_info)(stmt_info)->vectorizable | ||||
977 | && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)(stmt_info)->gather_scatter_p) | ||||
978 | { | ||||
979 | vect_record_base_alignment (vinfo, stmt_info, &DR_INNERMOST (dr)(dr)->innermost); | ||||
980 | |||||
981 | /* If DR is nested in the loop that is being vectorized, we can also | ||||
982 | record the alignment of the base wrt the outer loop. */ | ||||
983 | if (loop && nested_in_vect_loop_p (loop, stmt_info)) | ||||
984 | vect_record_base_alignment | ||||
985 | (vinfo, stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info)(stmt_info)->dr_wrt_vec_loop); | ||||
986 | } | ||||
987 | } | ||||
988 | } | ||||
989 | |||||
990 | /* Function vect_compute_data_ref_alignment | ||||
991 | |||||
992 | Compute the misalignment of the data reference DR_INFO when vectorizing | ||||
993 | with VECTYPE. | ||||
994 | |||||
995 | Output: | ||||
996 | 1. initialized misalignment info for DR_INFO | ||||
997 | |||||
998 | FOR NOW: No analysis is actually performed. Misalignment is calculated | ||||
999 | only for trivial cases. TODO. */ | ||||
1000 | |||||
1001 | static void | ||||
1002 | vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info, | ||||
1003 | tree vectype) | ||||
1004 | { | ||||
1005 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
1006 | vec_base_alignments *base_alignments = &vinfo->base_alignments; | ||||
1007 | loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); | ||||
1008 | class loop *loop = NULLnullptr; | ||||
1009 | tree ref = DR_REF (dr_info->dr)(dr_info->dr)->ref; | ||||
1010 | |||||
1011 | if (dump_enabled_p ()) | ||||
1012 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
1013 | "vect_compute_data_ref_alignment:\n"); | ||||
1014 | |||||
1015 | if (loop_vinfo) | ||||
1016 | loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||
1017 | |||||
1018 | /* Initialize misalignment to unknown. */ | ||||
1019 | SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN)set_dr_misalignment (dr_info, (-1)); | ||||
1020 | |||||
1021 | if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)(stmt_info)->gather_scatter_p) | ||||
1022 | return; | ||||
1023 | |||||
1024 | innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info); | ||||
1025 | bool step_preserves_misalignment_p; | ||||
1026 | |||||
1027 | poly_uint64 vector_alignment | ||||
1028 | = exact_div (targetm.vectorize.preferred_vector_alignment (vectype), | ||||
1029 | BITS_PER_UNIT(8)); | ||||
1030 | SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment)set_dr_target_alignment (dr_info, vector_alignment); | ||||
1031 | |||||
1032 | /* If the main loop has peeled for alignment we have no way of knowing | ||||
1033 | whether the data accesses in the epilogues are aligned. We can't at | ||||
1034 | compile time answer the question whether we have entered the main loop or | ||||
1035 | not. Fixes PR 92351. */ | ||||
1036 | if (loop_vinfo) | ||||
1037 | { | ||||
1038 | loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; | ||||
1039 | if (orig_loop_vinfo | ||||
1040 | && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo)(orig_loop_vinfo)->peeling_for_alignment != 0) | ||||
1041 | return; | ||||
1042 | } | ||||
1043 | |||||
1044 | unsigned HOST_WIDE_INTlong vect_align_c; | ||||
1045 | if (!vector_alignment.is_constant (&vect_align_c)) | ||||
1046 | return; | ||||
1047 | |||||
1048 | /* No step for BB vectorization. */ | ||||
1049 | if (!loop) | ||||
1050 | { | ||||
1051 | gcc_assert (integer_zerop (drb->step))((void)(!(integer_zerop (drb->step)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1051, __FUNCTION__), 0 : 0)); | ||||
1052 | step_preserves_misalignment_p = true; | ||||
1053 | } | ||||
1054 | |||||
1055 | /* In case the dataref is in an inner-loop of the loop that is being | ||||
1056 | vectorized (LOOP), we use the base and misalignment information | ||||
1057 | relative to the outer-loop (LOOP). This is ok only if the misalignment | ||||
1058 | stays the same throughout the execution of the inner-loop, which is why | ||||
1059 | we have to check that the stride of the dataref in the inner-loop evenly | ||||
1060 | divides by the vector alignment. */ | ||||
1061 | else if (nested_in_vect_loop_p (loop, stmt_info)) | ||||
1062 | { | ||||
1063 | step_preserves_misalignment_p | ||||
1064 | = (DR_STEP_ALIGNMENT (dr_info->dr)(dr_info->dr)->innermost.step_alignment % vect_align_c) == 0; | ||||
1065 | |||||
1066 | if (dump_enabled_p ()) | ||||
1067 | { | ||||
1068 | if (step_preserves_misalignment_p) | ||||
1069 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
1070 | "inner step divides the vector alignment.\n"); | ||||
1071 | else | ||||
1072 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
1073 | "inner step doesn't divide the vector" | ||||
1074 | " alignment.\n"); | ||||
1075 | } | ||||
1076 | } | ||||
1077 | |||||
1078 | /* Similarly we can only use base and misalignment information relative to | ||||
1079 | an innermost loop if the misalignment stays the same throughout the | ||||
1080 | execution of the loop. As above, this is the case if the stride of | ||||
1081 | the dataref evenly divides by the alignment. */ | ||||
1082 | else | ||||
1083 | { | ||||
1084 | poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | ||||
1085 | step_preserves_misalignment_p | ||||
1086 | = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr)(dr_info->dr)->innermost.step_alignment * vf, vect_align_c); | ||||
1087 | |||||
1088 | if (!step_preserves_misalignment_p && dump_enabled_p ()) | ||||
1089 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
1090 | "step doesn't divide the vector alignment.\n"); | ||||
1091 | } | ||||
1092 | |||||
1093 | unsigned int base_alignment = drb->base_alignment; | ||||
1094 | unsigned int base_misalignment = drb->base_misalignment; | ||||
1095 | |||||
1096 | /* Calculate the maximum of the pooled base address alignment and the | ||||
1097 | alignment that we can compute for DR itself. */ | ||||
1098 | std::pair<stmt_vec_info, innermost_loop_behavior *> *entry | ||||
1099 | = base_alignments->get (drb->base_address); | ||||
1100 | if (entry | ||||
1101 | && base_alignment < (*entry).second->base_alignment | ||||
1102 | && (loop_vinfo | ||||
1103 | || (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt_info->stmt), | ||||
1104 | gimple_bb (entry->first->stmt)) | ||||
1105 | && (gimple_bb (stmt_info->stmt) != gimple_bb (entry->first->stmt) | ||||
1106 | || (entry->first->dr_aux.group <= dr_info->group))))) | ||||
1107 | { | ||||
1108 | base_alignment = entry->second->base_alignment; | ||||
1109 | base_misalignment = entry->second->base_misalignment; | ||||
1110 | } | ||||
1111 | |||||
1112 | if (drb->offset_alignment < vect_align_c | ||||
1113 | || !step_preserves_misalignment_p | ||||
1114 | /* We need to know whether the step wrt the vectorized loop is | ||||
1115 | negative when computing the starting misalignment below. */ | ||||
1116 | || TREE_CODE (drb->step)((enum tree_code) (drb->step)->base.code) != INTEGER_CST) | ||||
1117 | { | ||||
1118 | if (dump_enabled_p ()) | ||||
1119 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
1120 | "Unknown alignment for access: %T\n", ref); | ||||
1121 | return; | ||||
1122 | } | ||||
1123 | |||||
1124 | if (base_alignment < vect_align_c) | ||||
1125 | { | ||||
1126 | unsigned int max_alignment; | ||||
1127 | tree base = get_base_for_alignment (drb->base_address, &max_alignment); | ||||
1128 | if (max_alignment < vect_align_c | ||||
1129 | || !vect_can_force_dr_alignment_p (base, | ||||
1130 | vect_align_c * BITS_PER_UNIT(8))) | ||||
1131 | { | ||||
1132 | if (dump_enabled_p ()) | ||||
1133 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
1134 | "can't force alignment of ref: %T\n", ref); | ||||
1135 | return; | ||||
1136 | } | ||||
1137 | |||||
1138 | /* Force the alignment of the decl. | ||||
1139 | NOTE: This is the only change to the code we make during | ||||
1140 | the analysis phase, before deciding to vectorize the loop. */ | ||||
1141 | if (dump_enabled_p ()) | ||||
1142 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
1143 | "force alignment of %T\n", ref); | ||||
1144 | |||||
1145 | dr_info->base_decl = base; | ||||
1146 | dr_info->base_misaligned = true; | ||||
1147 | base_misalignment = 0; | ||||
1148 | } | ||||
1149 | poly_int64 misalignment | ||||
1150 | = base_misalignment + wi::to_poly_offset (drb->init).force_shwi (); | ||||
1151 | |||||
1152 | unsigned int const_misalignment; | ||||
1153 | if (!known_misalignment (misalignment, vect_align_c, &const_misalignment)) | ||||
1154 | { | ||||
1155 | if (dump_enabled_p ()) | ||||
1156 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
1157 | "Non-constant misalignment for access: %T\n", ref); | ||||
1158 | return; | ||||
1159 | } | ||||
1160 | |||||
1161 | SET_DR_MISALIGNMENT (dr_info, const_misalignment)set_dr_misalignment (dr_info, const_misalignment); | ||||
1162 | |||||
1163 | if (dump_enabled_p ()) | ||||
1164 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
1165 | "misalign = %d bytes of ref %T\n", | ||||
1166 | const_misalignment, ref); | ||||
1167 | |||||
1168 | return; | ||||
1169 | } | ||||
1170 | |||||
1171 | /* Return whether DR_INFO, which is related to DR_PEEL_INFO in | ||||
1172 | that it only differs in DR_INIT, is aligned if DR_PEEL_INFO | ||||
1173 | is made aligned via peeling. */ | ||||
1174 | |||||
1175 | static bool | ||||
1176 | vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info, | ||||
1177 | dr_vec_info *dr_peel_info) | ||||
1178 | { | ||||
1179 | if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info)dr_target_alignment (dr_peel_info), | ||||
1180 | DR_TARGET_ALIGNMENT (dr_info)dr_target_alignment (dr_info))) | ||||
1181 | { | ||||
1182 | poly_offset_int diff | ||||
1183 | = (wi::to_poly_offset (DR_INIT (dr_peel_info->dr)(dr_peel_info->dr)->innermost.init) | ||||
1184 | - wi::to_poly_offset (DR_INIT (dr_info->dr)(dr_info->dr)->innermost.init)); | ||||
1185 | if (known_eq (diff, 0)(!maybe_ne (diff, 0)) | ||||
1186 | || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info)dr_target_alignment (dr_info))) | ||||
1187 | return true; | ||||
1188 | } | ||||
1189 | return false; | ||||
1190 | } | ||||
1191 | |||||
1192 | /* Return whether DR_INFO is aligned if DR_PEEL_INFO is made | ||||
1193 | aligned via peeling. */ | ||||
1194 | |||||
1195 | static bool | ||||
1196 | vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info, | ||||
1197 | dr_vec_info *dr_peel_info) | ||||
1198 | { | ||||
1199 | if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr)(dr_info->dr)->innermost.base_address, | ||||
1200 | DR_BASE_ADDRESS (dr_peel_info->dr)(dr_peel_info->dr)->innermost.base_address, 0) | ||||
1201 | || !operand_equal_p (DR_OFFSET (dr_info->dr)(dr_info->dr)->innermost.offset, | ||||
1202 | DR_OFFSET (dr_peel_info->dr)(dr_peel_info->dr)->innermost.offset, 0) | ||||
1203 | || !operand_equal_p (DR_STEP (dr_info->dr)(dr_info->dr)->innermost.step, | ||||
1204 | DR_STEP (dr_peel_info->dr)(dr_peel_info->dr)->innermost.step, 0)) | ||||
1205 | return false; | ||||
1206 | |||||
1207 | return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info); | ||||
1208 | } | ||||
1209 | |||||
1210 | /* Compute the value for dr_info->misalign so that the access appears | ||||
1211 | aligned. This is used by peeling to compensate for dr_misalignment | ||||
1212 | applying the offset for negative step. */ | ||||
1213 | |||||
1214 | int | ||||
1215 | vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info) | ||||
1216 | { | ||||
1217 | if (tree_int_cst_sgn (DR_STEP (dr_info->dr)(dr_info->dr)->innermost.step) >= 0) | ||||
1218 | return 0; | ||||
1219 | |||||
1220 | tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype; | ||||
1221 | poly_int64 misalignment | ||||
1222 | = ((TYPE_VECTOR_SUBPARTS (vectype) - 1) | ||||
1223 | * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))((unsigned long) (*tree_int_cst_elt_check ((((tree_class_check ((((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1223, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1223, __FUNCTION__))->type_common.size_unit)), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1223, __FUNCTION__)))); | ||||
1224 | |||||
1225 | unsigned HOST_WIDE_INTlong target_alignment_c; | ||||
1226 | int misalign; | ||||
1227 | if (!dr_info->target_alignment.is_constant (&target_alignment_c) | ||||
1228 | || !known_misalignment (misalignment, target_alignment_c, &misalign)) | ||||
1229 | return DR_MISALIGNMENT_UNKNOWN(-1); | ||||
1230 | return misalign; | ||||
1231 | } | ||||
1232 | |||||
1233 | /* Function vect_update_misalignment_for_peel. | ||||
1234 | Sets DR_INFO's misalignment | ||||
1235 | - to 0 if it has the same alignment as DR_PEEL_INFO, | ||||
1236 | - to the misalignment computed using NPEEL if DR_INFO's salignment is known, | ||||
1237 | - to -1 (unknown) otherwise. | ||||
1238 | |||||
1239 | DR_INFO - the data reference whose misalignment is to be adjusted. | ||||
1240 | DR_PEEL_INFO - the data reference whose misalignment is being made | ||||
1241 | zero in the vector loop by the peel. | ||||
1242 | NPEEL - the number of iterations in the peel loop if the misalignment | ||||
1243 | of DR_PEEL_INFO is known at compile time. */ | ||||
1244 | |||||
1245 | static void | ||||
1246 | vect_update_misalignment_for_peel (dr_vec_info *dr_info, | ||||
1247 | dr_vec_info *dr_peel_info, int npeel) | ||||
1248 | { | ||||
1249 | /* If dr_info is aligned of dr_peel_info is, then mark it so. */ | ||||
1250 | if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info)) | ||||
1251 | { | ||||
1252 | SET_DR_MISALIGNMENT (dr_info,set_dr_misalignment (dr_info, vect_dr_misalign_for_aligned_access (dr_peel_info)) | ||||
1253 | vect_dr_misalign_for_aligned_access (dr_peel_info))set_dr_misalignment (dr_info, vect_dr_misalign_for_aligned_access (dr_peel_info)); | ||||
1254 | return; | ||||
1255 | } | ||||
1256 | |||||
1257 | unsigned HOST_WIDE_INTlong alignment; | ||||
1258 | if (DR_TARGET_ALIGNMENT (dr_info)dr_target_alignment (dr_info).is_constant (&alignment) | ||||
1259 | && known_alignment_for_access_p (dr_info, | ||||
1260 | STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype) | ||||
1261 | && known_alignment_for_access_p (dr_peel_info, | ||||
1262 | STMT_VINFO_VECTYPE (dr_peel_info->stmt)(dr_peel_info->stmt)->vectype)) | ||||
1263 | { | ||||
1264 | int misal = dr_info->misalignment; | ||||
1265 | misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info->dr)-> innermost.step), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1265, __FUNCTION__))); | ||||
1266 | misal &= alignment - 1; | ||||
1267 | set_dr_misalignment (dr_info, misal); | ||||
1268 | return; | ||||
1269 | } | ||||
1270 | |||||
1271 | if (dump_enabled_p ()) | ||||
1272 | dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \ | ||||
1273 | "to unknown (-1).\n"); | ||||
1274 | SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN)set_dr_misalignment (dr_info, (-1)); | ||||
1275 | } | ||||
1276 | |||||
1277 | /* Return true if alignment is relevant for DR_INFO. */ | ||||
1278 | |||||
1279 | static bool | ||||
1280 | vect_relevant_for_alignment_p (dr_vec_info *dr_info) | ||||
1281 | { | ||||
1282 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
1283 | |||||
1284 | if (!STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)) | ||||
1285 | return false; | ||||
1286 | |||||
1287 | /* For interleaving, only the alignment of the first access matters. */ | ||||
1288 | if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)-> dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1288, __FUNCTION__), 0 : 0)), (stmt_info)->first_element )) | ||||
1289 | && DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1289, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ) != stmt_info) | ||||
1290 | return false; | ||||
1291 | |||||
1292 | /* Scatter-gather and invariant accesses continue to address individual | ||||
1293 | scalars, so vector-level alignment is irrelevant. */ | ||||
1294 | if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)(stmt_info)->gather_scatter_p | ||||
1295 | || integer_zerop (DR_STEP (dr_info->dr)(dr_info->dr)->innermost.step)) | ||||
1296 | return false; | ||||
1297 | |||||
1298 | /* Strided accesses perform only component accesses, alignment is | ||||
1299 | irrelevant for them. */ | ||||
1300 | if (STMT_VINFO_STRIDED_P (stmt_info)(stmt_info)->strided_p | ||||
1301 | && !STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)-> dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1301, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ))) | ||||
1302 | return false; | ||||
1303 | |||||
1304 | return true; | ||||
1305 | } | ||||
1306 | |||||
1307 | /* Given an memory reference EXP return whether its alignment is less | ||||
1308 | than its size. */ | ||||
1309 | |||||
1310 | static bool | ||||
1311 | not_size_aligned (tree exp) | ||||
1312 | { | ||||
1313 | if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))((tree_class_check ((((contains_struct_check ((exp), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1313, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1313, __FUNCTION__))->type_common.size))) | ||||
1314 | return true; | ||||
1315 | |||||
1316 | return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp))((tree_class_check ((((contains_struct_check ((exp), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1316, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1316, __FUNCTION__))->type_common.size)) | ||||
1317 | > get_object_alignment (exp)); | ||||
1318 | } | ||||
1319 | |||||
1320 | /* Function vector_alignment_reachable_p | ||||
1321 | |||||
1322 | Return true if vector alignment for DR_INFO is reachable by peeling | ||||
1323 | a few loop iterations. Return false otherwise. */ | ||||
1324 | |||||
1325 | static bool | ||||
1326 | vector_alignment_reachable_p (dr_vec_info *dr_info) | ||||
1327 | { | ||||
1328 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
1329 | tree vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; | ||||
1330 | |||||
1331 | if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)-> dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1331, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ))) | ||||
1332 | { | ||||
1333 | /* For interleaved access we peel only if number of iterations in | ||||
1334 | the prolog loop ({VF - misalignment}), is a multiple of the | ||||
1335 | number of the interleaved accesses. */ | ||||
1336 | int elem_size, mis_in_elements; | ||||
1337 | |||||
1338 | /* FORNOW: handle only known alignment. */ | ||||
1339 | if (!known_alignment_for_access_p (dr_info, vectype)) | ||||
1340 | return false; | ||||
1341 | |||||
1342 | poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype); | ||||
1343 | poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1343, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode)); | ||||
1344 | elem_size = vector_element_size (vector_size, nelements)(exact_div (vector_size, nelements).to_constant ()); | ||||
1345 | mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size; | ||||
1346 | |||||
1347 | if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1347, __FUNCTION__), 0 : 0)), (stmt_info)->size))) | ||||
1348 | return false; | ||||
1349 | } | ||||
1350 | |||||
1351 | /* If misalignment is known at the compile time then allow peeling | ||||
1352 | only if natural alignment is reachable through peeling. */ | ||||
1353 | if (known_alignment_for_access_p (dr_info, vectype) | ||||
1354 | && !aligned_access_p (dr_info, vectype)) | ||||
1355 | { | ||||
1356 | HOST_WIDE_INTlong elmsize = | ||||
1357 | int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))((tree_class_check ((((contains_struct_check ((vectype), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1357, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1357, __FUNCTION__))->type_common.size_unit)); | ||||
1358 | if (dump_enabled_p ()) | ||||
1359 | { | ||||
1360 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
1361 | "data size = %wd. misalignment = %d.\n", elmsize, | ||||
1362 | dr_misalignment (dr_info, vectype)); | ||||
1363 | } | ||||
1364 | if (dr_misalignment (dr_info, vectype) % elmsize) | ||||
1365 | { | ||||
1366 | if (dump_enabled_p ()) | ||||
1367 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
1368 | "data size does not divide the misalignment.\n"); | ||||
1369 | return false; | ||||
1370 | } | ||||
1371 | } | ||||
1372 | |||||
1373 | if (!known_alignment_for_access_p (dr_info, vectype)) | ||||
1374 | { | ||||
1375 | tree type = TREE_TYPE (DR_REF (dr_info->dr))((contains_struct_check (((dr_info->dr)->ref), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1375, __FUNCTION__))->typed.type); | ||||
1376 | bool is_packed = not_size_aligned (DR_REF (dr_info->dr)(dr_info->dr)->ref); | ||||
1377 | if (dump_enabled_p ()) | ||||
1378 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
1379 | "Unknown misalignment, %snaturally aligned\n", | ||||
1380 | is_packed ? "not " : ""); | ||||
1381 | return targetm.vectorize.vector_alignment_reachable (type, is_packed); | ||||
1382 | } | ||||
1383 | |||||
1384 | return true; | ||||
1385 | } | ||||
1386 | |||||
1387 | |||||
1388 | /* Calculate the cost of the memory access represented by DR_INFO. */ | ||||
1389 | |||||
1390 | static void | ||||
1391 | vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info, | ||||
1392 | dr_alignment_support alignment_support_scheme, | ||||
1393 | int misalignment, | ||||
1394 | unsigned int *inside_cost, | ||||
1395 | unsigned int *outside_cost, | ||||
1396 | stmt_vector_for_cost *body_cost_vec, | ||||
1397 | stmt_vector_for_cost *prologue_cost_vec) | ||||
1398 | { | ||||
1399 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
1400 | loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); | ||||
1401 | int ncopies; | ||||
1402 | |||||
1403 | if (PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | ||||
1404 | ncopies = 1; | ||||
1405 | else | ||||
1406 | ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype); | ||||
1407 | |||||
1408 | if (DR_IS_READ (dr_info->dr)(dr_info->dr)->is_read) | ||||
1409 | vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme, | ||||
1410 | misalignment, true, inside_cost, | ||||
1411 | outside_cost, prologue_cost_vec, body_cost_vec, false); | ||||
1412 | else | ||||
1413 | vect_get_store_cost (vinfo,stmt_info, ncopies, alignment_support_scheme, | ||||
1414 | misalignment, inside_cost, body_cost_vec); | ||||
1415 | |||||
1416 | if (dump_enabled_p ()) | ||||
1417 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
1418 | "vect_get_data_access_cost: inside_cost = %d, " | ||||
1419 | "outside_cost = %d.\n", *inside_cost, *outside_cost); | ||||
1420 | } | ||||
1421 | |||||
1422 | |||||
1423 | typedef struct _vect_peel_info | ||||
1424 | { | ||||
1425 | dr_vec_info *dr_info; | ||||
1426 | int npeel; | ||||
1427 | unsigned int count; | ||||
1428 | } *vect_peel_info; | ||||
1429 | |||||
1430 | typedef struct _vect_peel_extended_info | ||||
1431 | { | ||||
1432 | vec_info *vinfo; | ||||
1433 | struct _vect_peel_info peel_info; | ||||
1434 | unsigned int inside_cost; | ||||
1435 | unsigned int outside_cost; | ||||
1436 | } *vect_peel_extended_info; | ||||
1437 | |||||
1438 | |||||
1439 | /* Peeling hashtable helpers. */ | ||||
1440 | |||||
1441 | struct peel_info_hasher : free_ptr_hash <_vect_peel_info> | ||||
1442 | { | ||||
1443 | static inline hashval_t hash (const _vect_peel_info *); | ||||
1444 | static inline bool equal (const _vect_peel_info *, const _vect_peel_info *); | ||||
1445 | }; | ||||
1446 | |||||
1447 | inline hashval_t | ||||
1448 | peel_info_hasher::hash (const _vect_peel_info *peel_info) | ||||
1449 | { | ||||
1450 | return (hashval_t) peel_info->npeel; | ||||
1451 | } | ||||
1452 | |||||
1453 | inline bool | ||||
1454 | peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b) | ||||
1455 | { | ||||
1456 | return (a->npeel == b->npeel); | ||||
1457 | } | ||||
1458 | |||||
1459 | |||||
1460 | /* Insert DR_INFO into peeling hash table with NPEEL as key. */ | ||||
1461 | |||||
1462 | static void | ||||
1463 | vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab, | ||||
1464 | loop_vec_info loop_vinfo, dr_vec_info *dr_info, | ||||
1465 | int npeel, bool supportable_if_not_aligned) | ||||
1466 | { | ||||
1467 | struct _vect_peel_info elem, *slot; | ||||
1468 | _vect_peel_info **new_slot; | ||||
1469 | |||||
1470 | elem.npeel = npeel; | ||||
1471 | slot = peeling_htab->find (&elem); | ||||
1472 | if (slot) | ||||
1473 | slot->count++; | ||||
1474 | else | ||||
1475 | { | ||||
1476 | slot = XNEW (struct _vect_peel_info)((struct _vect_peel_info *) xmalloc (sizeof (struct _vect_peel_info ))); | ||||
1477 | slot->npeel = npeel; | ||||
1478 | slot->dr_info = dr_info; | ||||
1479 | slot->count = 1; | ||||
1480 | new_slot = peeling_htab->find_slot (slot, INSERT); | ||||
1481 | *new_slot = slot; | ||||
1482 | } | ||||
1483 | |||||
1484 | /* If this DR is not supported with unknown misalignment then bias | ||||
1485 | this slot when the cost model is disabled. */ | ||||
1486 | if (!supportable_if_not_aligned | ||||
1487 | && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)) | ||||
1488 | slot->count += VECT_MAX_COST1000; | ||||
1489 | } | ||||
1490 | |||||
1491 | |||||
1492 | /* Traverse peeling hash table to find peeling option that aligns maximum | ||||
1493 | number of data accesses. */ | ||||
1494 | |||||
1495 | int | ||||
1496 | vect_peeling_hash_get_most_frequent (_vect_peel_info **slot, | ||||
1497 | _vect_peel_extended_info *max) | ||||
1498 | { | ||||
1499 | vect_peel_info elem = *slot; | ||||
1500 | |||||
1501 | if (elem->count > max->peel_info.count | ||||
1502 | || (elem->count == max->peel_info.count | ||||
1503 | && max->peel_info.npeel > elem->npeel)) | ||||
1504 | { | ||||
1505 | max->peel_info.npeel = elem->npeel; | ||||
1506 | max->peel_info.count = elem->count; | ||||
1507 | max->peel_info.dr_info = elem->dr_info; | ||||
1508 | } | ||||
1509 | |||||
1510 | return 1; | ||||
1511 | } | ||||
1512 | |||||
1513 | /* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking | ||||
1514 | data access costs for all data refs. If UNKNOWN_MISALIGNMENT is true, | ||||
1515 | npeel is computed at runtime but DR0_INFO's misalignment will be zero | ||||
1516 | after peeling. */ | ||||
1517 | |||||
1518 | static void | ||||
1519 | vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo, | ||||
1520 | dr_vec_info *dr0_info, | ||||
1521 | unsigned int *inside_cost, | ||||
1522 | unsigned int *outside_cost, | ||||
1523 | stmt_vector_for_cost *body_cost_vec, | ||||
1524 | stmt_vector_for_cost *prologue_cost_vec, | ||||
1525 | unsigned int npeel) | ||||
1526 | { | ||||
1527 | vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs; | ||||
1528 | |||||
1529 | bool dr0_alignment_known_p | ||||
1530 | = (dr0_info | ||||
1531 | && known_alignment_for_access_p (dr0_info, | ||||
1532 | STMT_VINFO_VECTYPE (dr0_info->stmt)(dr0_info->stmt)->vectype)); | ||||
1533 | |||||
1534 | for (data_reference *dr : datarefs) | ||||
1535 | { | ||||
1536 | dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); | ||||
1537 | if (!vect_relevant_for_alignment_p (dr_info)) | ||||
1538 | continue; | ||||
1539 | |||||
1540 | tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype; | ||||
1541 | dr_alignment_support alignment_support_scheme; | ||||
1542 | int misalignment; | ||||
1543 | unsigned HOST_WIDE_INTlong alignment; | ||||
1544 | |||||
1545 | bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr)(dr_info->dr)->innermost.step, | ||||
1546 | size_zero_nodeglobal_trees[TI_SIZE_ZERO]) < 0; | ||||
1547 | poly_int64 off = 0; | ||||
1548 | if (negative) | ||||
1549 | off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1) | ||||
1550 | * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))((unsigned long) (*tree_int_cst_elt_check ((((tree_class_check ((((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1550, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1550, __FUNCTION__))->type_common.size_unit)), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1550, __FUNCTION__)))); | ||||
1551 | |||||
1552 | if (npeel == 0) | ||||
1553 | misalignment = dr_misalignment (dr_info, vectype, off); | ||||
1554 | else if (dr_info == dr0_info | ||||
1555 | || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info)) | ||||
1556 | misalignment = 0; | ||||
1557 | else if (!dr0_alignment_known_p | ||||
1558 | || !known_alignment_for_access_p (dr_info, vectype) | ||||
1559 | || !DR_TARGET_ALIGNMENT (dr_info)dr_target_alignment (dr_info).is_constant (&alignment)) | ||||
1560 | misalignment = DR_MISALIGNMENT_UNKNOWN(-1); | ||||
1561 | else | ||||
1562 | { | ||||
1563 | misalignment = dr_misalignment (dr_info, vectype, off); | ||||
1564 | misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info->dr)-> innermost.step), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1564, __FUNCTION__))); | ||||
1565 | misalignment &= alignment - 1; | ||||
1566 | } | ||||
1567 | alignment_support_scheme | ||||
1568 | = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype, | ||||
1569 | misalignment); | ||||
1570 | |||||
1571 | vect_get_data_access_cost (loop_vinfo, dr_info, | ||||
1572 | alignment_support_scheme, misalignment, | ||||
1573 | inside_cost, outside_cost, | ||||
1574 | body_cost_vec, prologue_cost_vec); | ||||
1575 | } | ||||
1576 | } | ||||
1577 | |||||
1578 | /* Traverse peeling hash table and calculate cost for each peeling option. | ||||
1579 | Find the one with the lowest cost. */ | ||||
1580 | |||||
1581 | int | ||||
1582 | vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot, | ||||
1583 | _vect_peel_extended_info *min) | ||||
1584 | { | ||||
1585 | vect_peel_info elem = *slot; | ||||
1586 | int dummy; | ||||
1587 | unsigned int inside_cost = 0, outside_cost = 0; | ||||
1588 | loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (min->vinfo); | ||||
1589 | stmt_vector_for_cost prologue_cost_vec, body_cost_vec, | ||||
1590 | epilogue_cost_vec; | ||||
1591 | |||||
1592 | prologue_cost_vec.create (2); | ||||
1593 | body_cost_vec.create (2); | ||||
1594 | epilogue_cost_vec.create (2); | ||||
1595 | |||||
1596 | vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost, | ||||
1597 | &outside_cost, &body_cost_vec, | ||||
1598 | &prologue_cost_vec, elem->npeel); | ||||
1599 | |||||
1600 | body_cost_vec.release (); | ||||
1601 | |||||
1602 | outside_cost += vect_get_known_peeling_cost | ||||
1603 | (loop_vinfo, elem->npeel, &dummy, | ||||
1604 | &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec, | ||||
1605 | &prologue_cost_vec, &epilogue_cost_vec); | ||||
1606 | |||||
1607 | /* Prologue and epilogue costs are added to the target model later. | ||||
1608 | These costs depend only on the scalar iteration cost, the | ||||
1609 | number of peeling iterations finally chosen, and the number of | ||||
1610 | misaligned statements. So discard the information found here. */ | ||||
1611 | prologue_cost_vec.release (); | ||||
1612 | epilogue_cost_vec.release (); | ||||
1613 | |||||
1614 | if (inside_cost < min->inside_cost | ||||
1615 | || (inside_cost == min->inside_cost | ||||
1616 | && outside_cost < min->outside_cost)) | ||||
1617 | { | ||||
1618 | min->inside_cost = inside_cost; | ||||
1619 | min->outside_cost = outside_cost; | ||||
1620 | min->peel_info.dr_info = elem->dr_info; | ||||
1621 | min->peel_info.npeel = elem->npeel; | ||||
1622 | min->peel_info.count = elem->count; | ||||
1623 | } | ||||
1624 | |||||
1625 | return 1; | ||||
1626 | } | ||||
1627 | |||||
1628 | |||||
1629 | /* Choose best peeling option by traversing peeling hash table and either | ||||
1630 | choosing an option with the lowest cost (if cost model is enabled) or the | ||||
1631 | option that aligns as many accesses as possible. */ | ||||
1632 | |||||
1633 | static struct _vect_peel_extended_info | ||||
1634 | vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab, | ||||
1635 | loop_vec_info loop_vinfo) | ||||
1636 | { | ||||
1637 | struct _vect_peel_extended_info res; | ||||
1638 | |||||
1639 | res.peel_info.dr_info = NULLnullptr; | ||||
1640 | res.vinfo = loop_vinfo; | ||||
1641 | |||||
1642 | if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)) | ||||
1643 | { | ||||
1644 | res.inside_cost = INT_MAX2147483647; | ||||
1645 | res.outside_cost = INT_MAX2147483647; | ||||
1646 | peeling_htab->traverse <_vect_peel_extended_info *, | ||||
1647 | vect_peeling_hash_get_lowest_cost> (&res); | ||||
1648 | } | ||||
1649 | else | ||||
1650 | { | ||||
1651 | res.peel_info.count = 0; | ||||
1652 | peeling_htab->traverse <_vect_peel_extended_info *, | ||||
1653 | vect_peeling_hash_get_most_frequent> (&res); | ||||
1654 | res.inside_cost = 0; | ||||
1655 | res.outside_cost = 0; | ||||
1656 | } | ||||
1657 | |||||
1658 | return res; | ||||
1659 | } | ||||
1660 | |||||
1661 | /* Return true if the new peeling NPEEL is supported. */ | ||||
1662 | |||||
1663 | static bool | ||||
1664 | vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info, | ||||
1665 | unsigned npeel) | ||||
1666 | { | ||||
1667 | vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs; | ||||
1668 | enum dr_alignment_support supportable_dr_alignment; | ||||
1669 | |||||
1670 | bool dr0_alignment_known_p | ||||
1671 | = known_alignment_for_access_p (dr0_info, | ||||
1672 | STMT_VINFO_VECTYPE (dr0_info->stmt)(dr0_info->stmt)->vectype); | ||||
1673 | |||||
1674 | /* Ensure that all data refs can be vectorized after the peel. */ | ||||
1675 | for (data_reference *dr : datarefs) | ||||
1676 | { | ||||
1677 | if (dr == dr0_info->dr) | ||||
1678 | continue; | ||||
1679 | |||||
1680 | dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); | ||||
1681 | if (!vect_relevant_for_alignment_p (dr_info) | ||||
1682 | || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info)) | ||||
1683 | continue; | ||||
1684 | |||||
1685 | tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype; | ||||
1686 | int misalignment; | ||||
1687 | unsigned HOST_WIDE_INTlong alignment; | ||||
1688 | if (!dr0_alignment_known_p | ||||
1689 | || !known_alignment_for_access_p (dr_info, vectype) | ||||
1690 | || !DR_TARGET_ALIGNMENT (dr_info)dr_target_alignment (dr_info).is_constant (&alignment)) | ||||
1691 | misalignment = DR_MISALIGNMENT_UNKNOWN(-1); | ||||
1692 | else | ||||
1693 | { | ||||
1694 | misalignment = dr_misalignment (dr_info, vectype); | ||||
1695 | misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr))((unsigned long) (*tree_int_cst_elt_check (((dr_info->dr)-> innermost.step), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1695, __FUNCTION__))); | ||||
1696 | misalignment &= alignment - 1; | ||||
1697 | } | ||||
1698 | supportable_dr_alignment | ||||
1699 | = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype, | ||||
1700 | misalignment); | ||||
1701 | if (supportable_dr_alignment == dr_unaligned_unsupported) | ||||
1702 | return false; | ||||
1703 | } | ||||
1704 | |||||
1705 | return true; | ||||
1706 | } | ||||
1707 | |||||
1708 | /* Compare two data-references DRA and DRB to group them into chunks | ||||
1709 | with related alignment. */ | ||||
1710 | |||||
1711 | static int | ||||
1712 | dr_align_group_sort_cmp (const void *dra_, const void *drb_) | ||||
1713 | { | ||||
1714 | data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_); | ||||
1715 | data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_); | ||||
1716 | int cmp; | ||||
1717 | |||||
1718 | /* Stabilize sort. */ | ||||
1719 | if (dra == drb) | ||||
1720 | return 0; | ||||
1721 | |||||
1722 | /* Ordering of DRs according to base. */ | ||||
1723 | cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra)(dra)->innermost.base_address, | ||||
1724 | DR_BASE_ADDRESS (drb)(drb)->innermost.base_address); | ||||
1725 | if (cmp != 0) | ||||
1726 | return cmp; | ||||
1727 | |||||
1728 | /* And according to DR_OFFSET. */ | ||||
1729 | cmp = data_ref_compare_tree (DR_OFFSET (dra)(dra)->innermost.offset, DR_OFFSET (drb)(drb)->innermost.offset); | ||||
1730 | if (cmp != 0) | ||||
1731 | return cmp; | ||||
1732 | |||||
1733 | /* And after step. */ | ||||
1734 | cmp = data_ref_compare_tree (DR_STEP (dra)(dra)->innermost.step, DR_STEP (drb)(drb)->innermost.step); | ||||
1735 | if (cmp != 0) | ||||
1736 | return cmp; | ||||
1737 | |||||
1738 | /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */ | ||||
1739 | cmp = data_ref_compare_tree (DR_INIT (dra)(dra)->innermost.init, DR_INIT (drb)(drb)->innermost.init); | ||||
1740 | if (cmp == 0) | ||||
1741 | return gimple_uid (DR_STMT (dra)(dra)->stmt) < gimple_uid (DR_STMT (drb)(drb)->stmt) ? -1 : 1; | ||||
1742 | return cmp; | ||||
1743 | } | ||||
1744 | |||||
1745 | /* Function vect_enhance_data_refs_alignment | ||||
1746 | |||||
1747 | This pass will use loop versioning and loop peeling in order to enhance | ||||
1748 | the alignment of data references in the loop. | ||||
1749 | |||||
1750 | FOR NOW: we assume that whatever versioning/peeling takes place, only the | ||||
1751 | original loop is to be vectorized. Any other loops that are created by | ||||
1752 | the transformations performed in this pass - are not supposed to be | ||||
1753 | vectorized. This restriction will be relaxed. | ||||
1754 | |||||
1755 | This pass will require a cost model to guide it whether to apply peeling | ||||
1756 | or versioning or a combination of the two. For example, the scheme that | ||||
1757 | intel uses when given a loop with several memory accesses, is as follows: | ||||
1758 | choose one memory access ('p') which alignment you want to force by doing | ||||
1759 | peeling. Then, either (1) generate a loop in which 'p' is aligned and all | ||||
1760 | other accesses are not necessarily aligned, or (2) use loop versioning to | ||||
1761 | generate one loop in which all accesses are aligned, and another loop in | ||||
1762 | which only 'p' is necessarily aligned. | ||||
1763 | |||||
1764 | ("Automatic Intra-Register Vectorization for the Intel Architecture", | ||||
1765 | Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International | ||||
1766 | Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) | ||||
1767 | |||||
1768 | Devising a cost model is the most critical aspect of this work. It will | ||||
1769 | guide us on which access to peel for, whether to use loop versioning, how | ||||
1770 | many versions to create, etc. The cost model will probably consist of | ||||
1771 | generic considerations as well as target specific considerations (on | ||||
1772 | powerpc for example, misaligned stores are more painful than misaligned | ||||
1773 | loads). | ||||
1774 | |||||
1775 | Here are the general steps involved in alignment enhancements: | ||||
1776 | |||||
1777 | -- original loop, before alignment analysis: | ||||
1778 | for (i=0; i<N; i++){ | ||||
1779 | x = q[i]; # DR_MISALIGNMENT(q) = unknown | ||||
1780 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | ||||
1781 | } | ||||
1782 | |||||
1783 | -- After vect_compute_data_refs_alignment: | ||||
1784 | for (i=0; i<N; i++){ | ||||
1785 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | ||||
1786 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | ||||
1787 | } | ||||
1788 | |||||
1789 | -- Possibility 1: we do loop versioning: | ||||
1790 | if (p is aligned) { | ||||
1791 | for (i=0; i<N; i++){ # loop 1A | ||||
1792 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | ||||
1793 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | ||||
1794 | } | ||||
1795 | } | ||||
1796 | else { | ||||
1797 | for (i=0; i<N; i++){ # loop 1B | ||||
1798 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | ||||
1799 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | ||||
1800 | } | ||||
1801 | } | ||||
1802 | |||||
1803 | -- Possibility 2: we do loop peeling: | ||||
1804 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | ||||
1805 | x = q[i]; | ||||
1806 | p[i] = y; | ||||
1807 | } | ||||
1808 | for (i = 3; i < N; i++){ # loop 2A | ||||
1809 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | ||||
1810 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | ||||
1811 | } | ||||
1812 | |||||
1813 | -- Possibility 3: combination of loop peeling and versioning: | ||||
1814 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | ||||
1815 | x = q[i]; | ||||
1816 | p[i] = y; | ||||
1817 | } | ||||
1818 | if (p is aligned) { | ||||
1819 | for (i = 3; i<N; i++){ # loop 3A | ||||
1820 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | ||||
1821 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | ||||
1822 | } | ||||
1823 | } | ||||
1824 | else { | ||||
1825 | for (i = 3; i<N; i++){ # loop 3B | ||||
1826 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | ||||
1827 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | ||||
1828 | } | ||||
1829 | } | ||||
1830 | |||||
1831 | These loops are later passed to loop_transform to be vectorized. The | ||||
1832 | vectorizer will use the alignment information to guide the transformation | ||||
1833 | (whether to generate regular loads/stores, or with special handling for | ||||
1834 | misalignment). */ | ||||
1835 | |||||
1836 | opt_result | ||||
1837 | vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) | ||||
1838 | { | ||||
1839 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||
1840 | dr_vec_info *first_store = NULLnullptr; | ||||
1841 | dr_vec_info *dr0_info = NULLnullptr; | ||||
1842 | struct data_reference *dr; | ||||
1843 | unsigned int i; | ||||
1844 | bool do_peeling = false; | ||||
1845 | bool do_versioning = false; | ||||
1846 | unsigned int npeel = 0; | ||||
1847 | bool one_misalignment_known = false; | ||||
1848 | bool one_misalignment_unknown = false; | ||||
1849 | bool one_dr_unsupportable = false; | ||||
1850 | dr_vec_info *unsupportable_dr_info = NULLnullptr; | ||||
1851 | unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0; | ||||
1852 | hash_table<peel_info_hasher> peeling_htab (1); | ||||
1853 | |||||
1854 | DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment")auto_dump_scope scope ("vect_enhance_data_refs_alignment", vect_location ); | ||||
1855 | |||||
1856 | /* Reset data so we can safely be called multiple times. */ | ||||
1857 | LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.truncate (0); | ||||
1858 | LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment = 0; | ||||
1859 | |||||
1860 | if (LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.is_empty ()) | ||||
1861 | return opt_result::success (); | ||||
1862 | |||||
1863 | /* Sort the vector of datarefs so DRs that have the same or dependent | ||||
1864 | alignment are next to each other. */ | ||||
1865 | auto_vec<data_reference_p> datarefs | ||||
1866 | = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.copy (); | ||||
1867 | datarefs.qsort (dr_align_group_sort_cmp)qsort (dr_align_group_sort_cmp); | ||||
1868 | |||||
1869 | /* Compute the number of DRs that become aligned when we peel | ||||
1870 | a dataref so it becomes aligned. */ | ||||
1871 | auto_vec<unsigned> n_same_align_refs (datarefs.length ()); | ||||
1872 | n_same_align_refs.quick_grow_cleared (datarefs.length ()); | ||||
1873 | unsigned i0; | ||||
1874 | for (i0 = 0; i0 < datarefs.length (); ++i0) | ||||
1875 | if (DR_BASE_ADDRESS (datarefs[i0])(datarefs[i0])->innermost.base_address) | ||||
1876 | break; | ||||
1877 | for (i = i0 + 1; i <= datarefs.length (); ++i) | ||||
1878 | { | ||||
1879 | if (i == datarefs.length () | ||||
1880 | || !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0])(datarefs[i0])->innermost.base_address, | ||||
1881 | DR_BASE_ADDRESS (datarefs[i])(datarefs[i])->innermost.base_address, 0) | ||||
1882 | || !operand_equal_p (DR_OFFSET (datarefs[i0])(datarefs[i0])->innermost.offset, | ||||
1883 | DR_OFFSET (datarefs[i])(datarefs[i])->innermost.offset, 0) | ||||
1884 | || !operand_equal_p (DR_STEP (datarefs[i0])(datarefs[i0])->innermost.step, | ||||
1885 | DR_STEP (datarefs[i])(datarefs[i])->innermost.step, 0)) | ||||
1886 | { | ||||
1887 | /* The subgroup [i0, i-1] now only differs in DR_INIT and | ||||
1888 | possibly DR_TARGET_ALIGNMENT. Still the whole subgroup | ||||
1889 | will get known misalignment if we align one of the refs | ||||
1890 | with the largest DR_TARGET_ALIGNMENT. */ | ||||
1891 | for (unsigned j = i0; j < i; ++j) | ||||
1892 | { | ||||
1893 | dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs[j]); | ||||
1894 | for (unsigned k = i0; k < i; ++k) | ||||
1895 | { | ||||
1896 | if (k == j) | ||||
1897 | continue; | ||||
1898 | dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs[k]); | ||||
1899 | if (vect_dr_aligned_if_related_peeled_dr_is (dr_infok, | ||||
1900 | dr_infoj)) | ||||
1901 | n_same_align_refs[j]++; | ||||
1902 | } | ||||
1903 | } | ||||
1904 | i0 = i; | ||||
1905 | } | ||||
1906 | } | ||||
1907 | |||||
1908 | /* While cost model enhancements are expected in the future, the high level | ||||
1909 | view of the code at this time is as follows: | ||||
1910 | |||||
1911 | A) If there is a misaligned access then see if peeling to align | ||||
1912 | this access can make all data references satisfy | ||||
1913 | vect_supportable_dr_alignment. If so, update data structures | ||||
1914 | as needed and return true. | ||||
1915 | |||||
1916 | B) If peeling wasn't possible and there is a data reference with an | ||||
1917 | unknown misalignment that does not satisfy vect_supportable_dr_alignment | ||||
1918 | then see if loop versioning checks can be used to make all data | ||||
1919 | references satisfy vect_supportable_dr_alignment. If so, update | ||||
1920 | data structures as needed and return true. | ||||
1921 | |||||
1922 | C) If neither peeling nor versioning were successful then return false if | ||||
1923 | any data reference does not satisfy vect_supportable_dr_alignment. | ||||
1924 | |||||
1925 | D) Return true (all data references satisfy vect_supportable_dr_alignment). | ||||
1926 | |||||
1927 | Note, Possibility 3 above (which is peeling and versioning together) is not | ||||
1928 | being done at this time. */ | ||||
1929 | |||||
1930 | /* (1) Peeling to force alignment. */ | ||||
1931 | |||||
1932 | /* (1.1) Decide whether to perform peeling, and how many iterations to peel: | ||||
1933 | Considerations: | ||||
1934 | + How many accesses will become aligned due to the peeling | ||||
1935 | - How many accesses will become unaligned due to the peeling, | ||||
1936 | and the cost of misaligned accesses. | ||||
1937 | - The cost of peeling (the extra runtime checks, the increase | ||||
1938 | in code size). */ | ||||
1939 | |||||
1940 | FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i)) | ||||
1941 | { | ||||
1942 | dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); | ||||
1943 | if (!vect_relevant_for_alignment_p (dr_info)) | ||||
1944 | continue; | ||||
1945 | |||||
1946 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
1947 | tree vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; | ||||
1948 | do_peeling = vector_alignment_reachable_p (dr_info); | ||||
1949 | if (do_peeling) | ||||
1950 | { | ||||
1951 | if (known_alignment_for_access_p (dr_info, vectype)) | ||||
1952 | { | ||||
1953 | unsigned int npeel_tmp = 0; | ||||
1954 | bool negative = tree_int_cst_compare (DR_STEP (dr)(dr)->innermost.step, | ||||
1955 | size_zero_nodeglobal_trees[TI_SIZE_ZERO]) < 0; | ||||
1956 | |||||
1957 | /* If known_alignment_for_access_p then we have set | ||||
1958 | DR_MISALIGNMENT which is only done if we know it at compiler | ||||
1959 | time, so it is safe to assume target alignment is constant. | ||||
1960 | */ | ||||
1961 | unsigned int target_align = | ||||
1962 | DR_TARGET_ALIGNMENT (dr_info)dr_target_alignment (dr_info).to_constant (); | ||||
1963 | unsigned HOST_WIDE_INTlong dr_size = vect_get_scalar_dr_size (dr_info); | ||||
1964 | poly_int64 off = 0; | ||||
1965 | if (negative) | ||||
1966 | off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size; | ||||
1967 | unsigned int mis = dr_misalignment (dr_info, vectype, off); | ||||
1968 | mis = negative ? mis : -mis; | ||||
1969 | if (mis != 0) | ||||
1970 | npeel_tmp = (mis & (target_align - 1)) / dr_size; | ||||
1971 | |||||
1972 | /* For multiple types, it is possible that the bigger type access | ||||
1973 | will have more than one peeling option. E.g., a loop with two | ||||
1974 | types: one of size (vector size / 4), and the other one of | ||||
1975 | size (vector size / 8). Vectorization factor will 8. If both | ||||
1976 | accesses are misaligned by 3, the first one needs one scalar | ||||
1977 | iteration to be aligned, and the second one needs 5. But the | ||||
1978 | first one will be aligned also by peeling 5 scalar | ||||
1979 | iterations, and in that case both accesses will be aligned. | ||||
1980 | Hence, except for the immediate peeling amount, we also want | ||||
1981 | to try to add full vector size, while we don't exceed | ||||
1982 | vectorization factor. | ||||
1983 | We do this automatically for cost model, since we calculate | ||||
1984 | cost for every peeling option. */ | ||||
1985 | poly_uint64 nscalars = npeel_tmp; | ||||
1986 | if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)) | ||||
1987 | { | ||||
1988 | poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | ||||
1989 | nscalars = (STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type | ||||
1990 | ? vf * DR_GROUP_SIZE (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 1990, __FUNCTION__), 0 : 0)), (stmt_info)->size) : vf); | ||||
1991 | } | ||||
1992 | |||||
1993 | /* Save info about DR in the hash table. Also include peeling | ||||
1994 | amounts according to the explanation above. Indicate | ||||
1995 | the alignment status when the ref is not aligned. | ||||
1996 | ??? Rather than using unknown alignment here we should | ||||
1997 | prune all entries from the peeling hashtable which cause | ||||
1998 | DRs to be not supported. */ | ||||
1999 | bool supportable_if_not_aligned | ||||
2000 | = vect_supportable_dr_alignment | ||||
2001 | (loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN(-1)); | ||||
2002 | while (known_le (npeel_tmp, nscalars)(!maybe_lt (nscalars, npeel_tmp))) | ||||
2003 | { | ||||
2004 | vect_peeling_hash_insert (&peeling_htab, loop_vinfo, | ||||
2005 | dr_info, npeel_tmp, | ||||
2006 | supportable_if_not_aligned); | ||||
2007 | npeel_tmp += MAX (1, target_align / dr_size)((1) > (target_align / dr_size) ? (1) : (target_align / dr_size )); | ||||
2008 | } | ||||
2009 | |||||
2010 | one_misalignment_known = true; | ||||
2011 | } | ||||
2012 | else | ||||
2013 | { | ||||
2014 | /* If we don't know any misalignment values, we prefer | ||||
2015 | peeling for data-ref that has the maximum number of data-refs | ||||
2016 | with the same alignment, unless the target prefers to align | ||||
2017 | stores over load. */ | ||||
2018 | unsigned same_align_drs = n_same_align_refs[i]; | ||||
2019 | if (!dr0_info | ||||
2020 | || dr0_same_align_drs < same_align_drs) | ||||
2021 | { | ||||
2022 | dr0_same_align_drs = same_align_drs; | ||||
2023 | dr0_info = dr_info; | ||||
2024 | } | ||||
2025 | /* For data-refs with the same number of related | ||||
2026 | accesses prefer the one where the misalign | ||||
2027 | computation will be invariant in the outermost loop. */ | ||||
2028 | else if (dr0_same_align_drs == same_align_drs) | ||||
2029 | { | ||||
2030 | class loop *ivloop0, *ivloop; | ||||
2031 | ivloop0 = outermost_invariant_loop_for_expr | ||||
2032 | (loop, DR_BASE_ADDRESS (dr0_info->dr)(dr0_info->dr)->innermost.base_address); | ||||
2033 | ivloop = outermost_invariant_loop_for_expr | ||||
2034 | (loop, DR_BASE_ADDRESS (dr)(dr)->innermost.base_address); | ||||
2035 | if ((ivloop && !ivloop0) | ||||
2036 | || (ivloop && ivloop0 | ||||
2037 | && flow_loop_nested_p (ivloop, ivloop0))) | ||||
2038 | dr0_info = dr_info; | ||||
2039 | } | ||||
2040 | |||||
2041 | one_misalignment_unknown = true; | ||||
2042 | |||||
2043 | /* Check for data refs with unsupportable alignment that | ||||
2044 | can be peeled. */ | ||||
2045 | enum dr_alignment_support supportable_dr_alignment | ||||
2046 | = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype, | ||||
2047 | DR_MISALIGNMENT_UNKNOWN(-1)); | ||||
2048 | if (supportable_dr_alignment == dr_unaligned_unsupported) | ||||
2049 | { | ||||
2050 | one_dr_unsupportable = true; | ||||
2051 | unsupportable_dr_info = dr_info; | ||||
2052 | } | ||||
2053 | |||||
2054 | if (!first_store && DR_IS_WRITE (dr)(!(dr)->is_read)) | ||||
2055 | { | ||||
2056 | first_store = dr_info; | ||||
2057 | first_store_same_align_drs = same_align_drs; | ||||
2058 | } | ||||
2059 | } | ||||
2060 | } | ||||
2061 | else | ||||
2062 | { | ||||
2063 | if (!aligned_access_p (dr_info, vectype)) | ||||
2064 | { | ||||
2065 | if (dump_enabled_p ()) | ||||
2066 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
2067 | "vector alignment may not be reachable\n"); | ||||
2068 | break; | ||||
2069 | } | ||||
2070 | } | ||||
2071 | } | ||||
2072 | |||||
2073 | /* Check if we can possibly peel the loop. */ | ||||
2074 | if (!vect_can_advance_ivs_p (loop_vinfo) | ||||
2075 | || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)) | ||||
2076 | || loop->inner) | ||||
2077 | do_peeling = false; | ||||
2078 | |||||
2079 | struct _vect_peel_extended_info peel_for_known_alignment; | ||||
2080 | struct _vect_peel_extended_info peel_for_unknown_alignment; | ||||
2081 | struct _vect_peel_extended_info best_peel; | ||||
2082 | |||||
2083 | peel_for_unknown_alignment.inside_cost = INT_MAX2147483647; | ||||
2084 | peel_for_unknown_alignment.outside_cost = INT_MAX2147483647; | ||||
2085 | peel_for_unknown_alignment.peel_info.count = 0; | ||||
2086 | |||||
2087 | if (do_peeling | ||||
2088 | && one_misalignment_unknown) | ||||
2089 | { | ||||
2090 | /* Check if the target requires to prefer stores over loads, i.e., if | ||||
2091 | misaligned stores are more expensive than misaligned loads (taking | ||||
2092 | drs with same alignment into account). */ | ||||
2093 | unsigned int load_inside_cost = 0; | ||||
2094 | unsigned int load_outside_cost = 0; | ||||
2095 | unsigned int store_inside_cost = 0; | ||||
2096 | unsigned int store_outside_cost = 0; | ||||
2097 | unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2; | ||||
2098 | |||||
2099 | stmt_vector_for_cost dummy; | ||||
2100 | dummy.create (2); | ||||
2101 | vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info, | ||||
2102 | &load_inside_cost, | ||||
2103 | &load_outside_cost, | ||||
2104 | &dummy, &dummy, estimated_npeels); | ||||
2105 | dummy.release (); | ||||
2106 | |||||
2107 | if (first_store) | ||||
2108 | { | ||||
2109 | dummy.create (2); | ||||
2110 | vect_get_peeling_costs_all_drs (loop_vinfo, first_store, | ||||
2111 | &store_inside_cost, | ||||
2112 | &store_outside_cost, | ||||
2113 | &dummy, &dummy, | ||||
2114 | estimated_npeels); | ||||
2115 | dummy.release (); | ||||
2116 | } | ||||
2117 | else | ||||
2118 | { | ||||
2119 | store_inside_cost = INT_MAX2147483647; | ||||
2120 | store_outside_cost = INT_MAX2147483647; | ||||
2121 | } | ||||
2122 | |||||
2123 | if (load_inside_cost > store_inside_cost | ||||
2124 | || (load_inside_cost == store_inside_cost | ||||
2125 | && load_outside_cost > store_outside_cost)) | ||||
2126 | { | ||||
2127 | dr0_info = first_store; | ||||
2128 | dr0_same_align_drs = first_store_same_align_drs; | ||||
2129 | peel_for_unknown_alignment.inside_cost = store_inside_cost; | ||||
2130 | peel_for_unknown_alignment.outside_cost = store_outside_cost; | ||||
2131 | } | ||||
2132 | else | ||||
2133 | { | ||||
2134 | peel_for_unknown_alignment.inside_cost = load_inside_cost; | ||||
2135 | peel_for_unknown_alignment.outside_cost = load_outside_cost; | ||||
2136 | } | ||||
2137 | |||||
2138 | stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec; | ||||
2139 | prologue_cost_vec.create (2); | ||||
2140 | epilogue_cost_vec.create (2); | ||||
2141 | |||||
2142 | int dummy2; | ||||
2143 | peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost | ||||
2144 | (loop_vinfo, estimated_npeels, &dummy2, | ||||
2145 | &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec, | ||||
2146 | &prologue_cost_vec, &epilogue_cost_vec); | ||||
2147 | |||||
2148 | prologue_cost_vec.release (); | ||||
2149 | epilogue_cost_vec.release (); | ||||
2150 | |||||
2151 | peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + 1; | ||||
2152 | } | ||||
2153 | |||||
2154 | peel_for_unknown_alignment.peel_info.npeel = 0; | ||||
2155 | peel_for_unknown_alignment.peel_info.dr_info = dr0_info; | ||||
2156 | |||||
2157 | best_peel = peel_for_unknown_alignment; | ||||
2158 | |||||
2159 | peel_for_known_alignment.inside_cost = INT_MAX2147483647; | ||||
2160 | peel_for_known_alignment.outside_cost = INT_MAX2147483647; | ||||
2161 | peel_for_known_alignment.peel_info.count = 0; | ||||
2162 | peel_for_known_alignment.peel_info.dr_info = NULLnullptr; | ||||
2163 | |||||
2164 | if (do_peeling && one_misalignment_known) | ||||
2165 | { | ||||
2166 | /* Peeling is possible, but there is no data access that is not supported | ||||
2167 | unless aligned. So we try to choose the best possible peeling from | ||||
2168 | the hash table. */ | ||||
2169 | peel_for_known_alignment = vect_peeling_hash_choose_best_peeling | ||||
2170 | (&peeling_htab, loop_vinfo); | ||||
2171 | } | ||||
2172 | |||||
2173 | /* Compare costs of peeling for known and unknown alignment. */ | ||||
2174 | if (peel_for_known_alignment.peel_info.dr_info != NULLnullptr | ||||
2175 | && peel_for_unknown_alignment.inside_cost | ||||
2176 | >= peel_for_known_alignment.inside_cost) | ||||
2177 | { | ||||
2178 | best_peel = peel_for_known_alignment; | ||||
2179 | |||||
2180 | /* If the best peeling for known alignment has NPEEL == 0, perform no | ||||
2181 | peeling at all except if there is an unsupportable dr that we can | ||||
2182 | align. */ | ||||
2183 | if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable) | ||||
2184 | do_peeling = false; | ||||
2185 | } | ||||
2186 | |||||
2187 | /* If there is an unsupportable data ref, prefer this over all choices so far | ||||
2188 | since we'd have to discard a chosen peeling except when it accidentally | ||||
2189 | aligned the unsupportable data ref. */ | ||||
2190 | if (one_dr_unsupportable) | ||||
2191 | dr0_info = unsupportable_dr_info; | ||||
2192 | else if (do_peeling) | ||||
2193 | { | ||||
2194 | /* Calculate the penalty for no peeling, i.e. leaving everything as-is. | ||||
2195 | TODO: Use nopeel_outside_cost or get rid of it? */ | ||||
2196 | unsigned nopeel_inside_cost = 0; | ||||
2197 | unsigned nopeel_outside_cost = 0; | ||||
2198 | |||||
2199 | stmt_vector_for_cost dummy; | ||||
2200 | dummy.create (2); | ||||
2201 | vect_get_peeling_costs_all_drs (loop_vinfo, NULLnullptr, &nopeel_inside_cost, | ||||
2202 | &nopeel_outside_cost, &dummy, &dummy, 0); | ||||
2203 | dummy.release (); | ||||
2204 | |||||
2205 | /* Add epilogue costs. As we do not peel for alignment here, no prologue | ||||
2206 | costs will be recorded. */ | ||||
2207 | stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec; | ||||
2208 | prologue_cost_vec.create (2); | ||||
2209 | epilogue_cost_vec.create (2); | ||||
2210 | |||||
2211 | int dummy2; | ||||
2212 | nopeel_outside_cost += vect_get_known_peeling_cost | ||||
2213 | (loop_vinfo, 0, &dummy2, | ||||
2214 | &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec, | ||||
2215 | &prologue_cost_vec, &epilogue_cost_vec); | ||||
2216 | |||||
2217 | prologue_cost_vec.release (); | ||||
2218 | epilogue_cost_vec.release (); | ||||
2219 | |||||
2220 | npeel = best_peel.peel_info.npeel; | ||||
2221 | dr0_info = best_peel.peel_info.dr_info; | ||||
2222 | |||||
2223 | /* If no peeling is not more expensive than the best peeling we | ||||
2224 | have so far, don't perform any peeling. */ | ||||
2225 | if (nopeel_inside_cost <= best_peel.inside_cost) | ||||
2226 | do_peeling = false; | ||||
2227 | } | ||||
2228 | |||||
2229 | if (do_peeling) | ||||
2230 | { | ||||
2231 | stmt_vec_info stmt_info = dr0_info->stmt; | ||||
2232 | if (known_alignment_for_access_p (dr0_info, | ||||
2233 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype)) | ||||
2234 | { | ||||
2235 | bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr)(dr0_info->dr)->innermost.step, | ||||
2236 | size_zero_nodeglobal_trees[TI_SIZE_ZERO]) < 0; | ||||
2237 | if (!npeel) | ||||
2238 | { | ||||
2239 | /* Since it's known at compile time, compute the number of | ||||
2240 | iterations in the peeled loop (the peeling factor) for use in | ||||
2241 | updating DR_MISALIGNMENT values. The peeling factor is the | ||||
2242 | vectorization factor minus the misalignment as an element | ||||
2243 | count. */ | ||||
2244 | tree vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; | ||||
2245 | poly_int64 off = 0; | ||||
2246 | if (negative) | ||||
2247 | off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1) | ||||
2248 | * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))((unsigned long) (*tree_int_cst_elt_check ((((tree_class_check ((((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2248, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2248, __FUNCTION__))->type_common.size_unit)), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2248, __FUNCTION__)))); | ||||
2249 | unsigned int mis | ||||
2250 | = dr_misalignment (dr0_info, vectype, off); | ||||
2251 | mis = negative ? mis : -mis; | ||||
2252 | /* If known_alignment_for_access_p then we have set | ||||
2253 | DR_MISALIGNMENT which is only done if we know it at compiler | ||||
2254 | time, so it is safe to assume target alignment is constant. | ||||
2255 | */ | ||||
2256 | unsigned int target_align = | ||||
2257 | DR_TARGET_ALIGNMENT (dr0_info)dr_target_alignment (dr0_info).to_constant (); | ||||
2258 | npeel = ((mis & (target_align - 1)) | ||||
2259 | / vect_get_scalar_dr_size (dr0_info)); | ||||
2260 | } | ||||
2261 | |||||
2262 | /* For interleaved data access every iteration accesses all the | ||||
2263 | members of the group, therefore we divide the number of iterations | ||||
2264 | by the group size. */ | ||||
2265 | if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)-> dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2265, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ))) | ||||
2266 | npeel /= DR_GROUP_SIZE (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2266, __FUNCTION__), 0 : 0)), (stmt_info)->size); | ||||
2267 | |||||
2268 | if (dump_enabled_p ()) | ||||
2269 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2270 | "Try peeling by %d\n", npeel); | ||||
2271 | } | ||||
2272 | |||||
2273 | /* Ensure that all datarefs can be vectorized after the peel. */ | ||||
2274 | if (!vect_peeling_supportable (loop_vinfo, dr0_info, npeel)) | ||||
2275 | do_peeling = false; | ||||
2276 | |||||
2277 | /* Check if all datarefs are supportable and log. */ | ||||
2278 | if (do_peeling | ||||
2279 | && npeel == 0 | ||||
2280 | && known_alignment_for_access_p (dr0_info, | ||||
2281 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype)) | ||||
2282 | return opt_result::success (); | ||||
2283 | |||||
2284 | /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */ | ||||
2285 | if (do_peeling) | ||||
2286 | { | ||||
2287 | unsigned max_allowed_peel | ||||
2288 | = param_vect_max_peeling_for_alignmentglobal_options.x_param_vect_max_peeling_for_alignment; | ||||
2289 | if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP) | ||||
2290 | max_allowed_peel = 0; | ||||
2291 | if (max_allowed_peel != (unsigned)-1) | ||||
2292 | { | ||||
2293 | unsigned max_peel = npeel; | ||||
2294 | if (max_peel == 0) | ||||
2295 | { | ||||
2296 | poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info)dr_target_alignment (dr0_info); | ||||
2297 | unsigned HOST_WIDE_INTlong target_align_c; | ||||
2298 | if (target_align.is_constant (&target_align_c)) | ||||
2299 | max_peel = | ||||
2300 | target_align_c / vect_get_scalar_dr_size (dr0_info) - 1; | ||||
2301 | else | ||||
2302 | { | ||||
2303 | do_peeling = false; | ||||
2304 | if (dump_enabled_p ()) | ||||
2305 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2306 | "Disable peeling, max peels set and vector" | ||||
2307 | " alignment unknown\n"); | ||||
2308 | } | ||||
2309 | } | ||||
2310 | if (max_peel > max_allowed_peel) | ||||
2311 | { | ||||
2312 | do_peeling = false; | ||||
2313 | if (dump_enabled_p ()) | ||||
2314 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2315 | "Disable peeling, max peels reached: %d\n", max_peel); | ||||
2316 | } | ||||
2317 | } | ||||
2318 | } | ||||
2319 | |||||
2320 | /* Cost model #2 - if peeling may result in a remaining loop not | ||||
2321 | iterating enough to be vectorized then do not peel. Since this | ||||
2322 | is a cost heuristic rather than a correctness decision, use the | ||||
2323 | most likely runtime value for variable vectorization factors. */ | ||||
2324 | if (do_peeling | ||||
2325 | && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) | ||||
2326 | { | ||||
2327 | unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); | ||||
2328 | unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel; | ||||
2329 | if ((unsigned HOST_WIDE_INTlong) LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2329, __FUNCTION__)))) | ||||
2330 | < assumed_vf + max_peel) | ||||
2331 | do_peeling = false; | ||||
2332 | } | ||||
2333 | |||||
2334 | if (do_peeling) | ||||
2335 | { | ||||
2336 | /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i. | ||||
2337 | If the misalignment of DR_i is identical to that of dr0 then set | ||||
2338 | DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and | ||||
2339 | dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i) | ||||
2340 | by the peeling factor times the element size of DR_i (MOD the | ||||
2341 | vectorization factor times the size). Otherwise, the | ||||
2342 | misalignment of DR_i must be set to unknown. */ | ||||
2343 | FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i)) | ||||
2344 | if (dr != dr0_info->dr) | ||||
2345 | { | ||||
2346 | dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); | ||||
2347 | if (!vect_relevant_for_alignment_p (dr_info)) | ||||
2348 | continue; | ||||
2349 | |||||
2350 | vect_update_misalignment_for_peel (dr_info, dr0_info, npeel); | ||||
2351 | } | ||||
2352 | |||||
2353 | LOOP_VINFO_UNALIGNED_DR (loop_vinfo)(loop_vinfo)->unaligned_dr = dr0_info; | ||||
2354 | if (npeel) | ||||
2355 | LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment = npeel; | ||||
2356 | else | ||||
2357 | LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment = -1; | ||||
2358 | SET_DR_MISALIGNMENT (dr0_info,set_dr_misalignment (dr0_info, vect_dr_misalign_for_aligned_access (dr0_info)) | ||||
2359 | vect_dr_misalign_for_aligned_access (dr0_info))set_dr_misalignment (dr0_info, vect_dr_misalign_for_aligned_access (dr0_info)); | ||||
2360 | if (dump_enabled_p ()) | ||||
2361 | { | ||||
2362 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2363 | "Alignment of access forced using peeling.\n"); | ||||
2364 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2365 | "Peeling for alignment will be applied.\n"); | ||||
2366 | } | ||||
2367 | |||||
2368 | /* The inside-loop cost will be accounted for in vectorizable_load | ||||
2369 | and vectorizable_store correctly with adjusted alignments. | ||||
2370 | Drop the body_cst_vec on the floor here. */ | ||||
2371 | return opt_result::success (); | ||||
2372 | } | ||||
2373 | } | ||||
2374 | |||||
2375 | /* (2) Versioning to force alignment. */ | ||||
2376 | |||||
2377 | /* Try versioning if: | ||||
2378 | 1) optimize loop for speed and the cost-model is not cheap | ||||
2379 | 2) there is at least one unsupported misaligned data ref with an unknown | ||||
2380 | misalignment, and | ||||
2381 | 3) all misaligned data refs with a known misalignment are supported, and | ||||
2382 | 4) the number of runtime alignment checks is within reason. */ | ||||
2383 | |||||
2384 | do_versioning | ||||
2385 | = (optimize_loop_nest_for_speed_p (loop) | ||||
2386 | && !loop->inner /* FORNOW */ | ||||
2387 | && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP); | ||||
2388 | |||||
2389 | if (do_versioning) | ||||
2390 | { | ||||
2391 | FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i)) | ||||
2392 | { | ||||
2393 | dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); | ||||
2394 | if (!vect_relevant_for_alignment_p (dr_info)) | ||||
2395 | continue; | ||||
2396 | |||||
2397 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
2398 | if (STMT_VINFO_STRIDED_P (stmt_info)(stmt_info)->strided_p) | ||||
2399 | { | ||||
2400 | do_versioning = false; | ||||
2401 | break; | ||||
2402 | } | ||||
2403 | |||||
2404 | tree vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; | ||||
2405 | bool negative = tree_int_cst_compare (DR_STEP (dr)(dr)->innermost.step, | ||||
2406 | size_zero_nodeglobal_trees[TI_SIZE_ZERO]) < 0; | ||||
2407 | poly_int64 off = 0; | ||||
2408 | if (negative) | ||||
2409 | off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1) | ||||
2410 | * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))((unsigned long) (*tree_int_cst_elt_check ((((tree_class_check ((((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2410, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2410, __FUNCTION__))->type_common.size_unit)), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2410, __FUNCTION__)))); | ||||
2411 | int misalignment; | ||||
2412 | if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0) | ||||
2413 | continue; | ||||
2414 | |||||
2415 | enum dr_alignment_support supportable_dr_alignment | ||||
2416 | = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype, | ||||
2417 | misalignment); | ||||
2418 | if (supportable_dr_alignment == dr_unaligned_unsupported) | ||||
2419 | { | ||||
2420 | if (misalignment != DR_MISALIGNMENT_UNKNOWN(-1) | ||||
2421 | || (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.length () | ||||
2422 | >= (unsigned) param_vect_max_version_for_alignment_checksglobal_options.x_param_vect_max_version_for_alignment_checks)) | ||||
2423 | { | ||||
2424 | do_versioning = false; | ||||
2425 | break; | ||||
2426 | } | ||||
2427 | |||||
2428 | /* At present we don't support versioning for alignment | ||||
2429 | with variable VF, since there's no guarantee that the | ||||
2430 | VF is a power of two. We could relax this if we added | ||||
2431 | a way of enforcing a power-of-two size. */ | ||||
2432 | unsigned HOST_WIDE_INTlong size; | ||||
2433 | if (!GET_MODE_SIZE (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2433, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode)).is_constant (&size)) | ||||
2434 | { | ||||
2435 | do_versioning = false; | ||||
2436 | break; | ||||
2437 | } | ||||
2438 | |||||
2439 | /* Forcing alignment in the first iteration is no good if | ||||
2440 | we don't keep it across iterations. For now, just disable | ||||
2441 | versioning in this case. | ||||
2442 | ?? We could actually unroll the loop to achieve the required | ||||
2443 | overall step alignment, and forcing the alignment could be | ||||
2444 | done by doing some iterations of the non-vectorized loop. */ | ||||
2445 | if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor | ||||
2446 | * DR_STEP_ALIGNMENT (dr)(dr)->innermost.step_alignment, | ||||
2447 | DR_TARGET_ALIGNMENT (dr_info)dr_target_alignment (dr_info))) | ||||
2448 | { | ||||
2449 | do_versioning = false; | ||||
2450 | break; | ||||
2451 | } | ||||
2452 | |||||
2453 | /* The rightmost bits of an aligned address must be zeros. | ||||
2454 | Construct the mask needed for this test. For example, | ||||
2455 | GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the | ||||
2456 | mask must be 15 = 0xf. */ | ||||
2457 | int mask = size - 1; | ||||
2458 | |||||
2459 | /* FORNOW: use the same mask to test all potentially unaligned | ||||
2460 | references in the loop. */ | ||||
2461 | if (LOOP_VINFO_PTR_MASK (loop_vinfo)(loop_vinfo)->ptr_mask | ||||
2462 | && LOOP_VINFO_PTR_MASK (loop_vinfo)(loop_vinfo)->ptr_mask != mask) | ||||
2463 | { | ||||
2464 | do_versioning = false; | ||||
2465 | break; | ||||
2466 | } | ||||
2467 | |||||
2468 | LOOP_VINFO_PTR_MASK (loop_vinfo)(loop_vinfo)->ptr_mask = mask; | ||||
2469 | LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.safe_push (stmt_info); | ||||
2470 | } | ||||
2471 | } | ||||
2472 | |||||
2473 | /* Versioning requires at least one misaligned data reference. */ | ||||
2474 | if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)((loop_vinfo)->may_misalign_stmts.length () > 0)) | ||||
2475 | do_versioning = false; | ||||
2476 | else if (!do_versioning) | ||||
2477 | LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.truncate (0); | ||||
2478 | } | ||||
2479 | |||||
2480 | if (do_versioning) | ||||
2481 | { | ||||
2482 | const vec<stmt_vec_info> &may_misalign_stmts | ||||
2483 | = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts; | ||||
2484 | stmt_vec_info stmt_info; | ||||
2485 | |||||
2486 | /* It can now be assumed that the data references in the statements | ||||
2487 | in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version | ||||
2488 | of the loop being vectorized. */ | ||||
2489 | FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)for (i = 0; (may_misalign_stmts).iterate ((i), &(stmt_info )); ++(i)) | ||||
2490 | { | ||||
2491 | dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info)(((void)(!((stmt_info)->dr_aux.stmt == (stmt_info)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2491, __FUNCTION__), 0 : 0)), &(stmt_info)->dr_aux); | ||||
2492 | SET_DR_MISALIGNMENT (dr_info,set_dr_misalignment (dr_info, vect_dr_misalign_for_aligned_access (dr_info)) | ||||
2493 | vect_dr_misalign_for_aligned_access (dr_info))set_dr_misalignment (dr_info, vect_dr_misalign_for_aligned_access (dr_info)); | ||||
2494 | if (dump_enabled_p ()) | ||||
2495 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2496 | "Alignment of access forced using versioning.\n"); | ||||
2497 | } | ||||
2498 | |||||
2499 | if (dump_enabled_p ()) | ||||
2500 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2501 | "Versioning for alignment will be applied.\n"); | ||||
2502 | |||||
2503 | /* Peeling and versioning can't be done together at this time. */ | ||||
2504 | gcc_assert (! (do_peeling && do_versioning))((void)(!(! (do_peeling && do_versioning)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2504, __FUNCTION__), 0 : 0)); | ||||
2505 | |||||
2506 | return opt_result::success (); | ||||
2507 | } | ||||
2508 | |||||
2509 | /* This point is reached if neither peeling nor versioning is being done. */ | ||||
2510 | gcc_assert (! (do_peeling || do_versioning))((void)(!(! (do_peeling || do_versioning)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2510, __FUNCTION__), 0 : 0)); | ||||
2511 | |||||
2512 | return opt_result::success (); | ||||
2513 | } | ||||
2514 | |||||
2515 | |||||
2516 | /* Function vect_analyze_data_refs_alignment | ||||
2517 | |||||
2518 | Analyze the alignment of the data-references in the loop. | ||||
2519 | Return FALSE if a data reference is found that cannot be vectorized. */ | ||||
2520 | |||||
2521 | opt_result | ||||
2522 | vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) | ||||
2523 | { | ||||
2524 | DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment")auto_dump_scope scope ("vect_analyze_data_refs_alignment", vect_location ); | ||||
2525 | |||||
2526 | vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs; | ||||
2527 | struct data_reference *dr; | ||||
2528 | unsigned int i; | ||||
2529 | |||||
2530 | vect_record_base_alignments (loop_vinfo); | ||||
2531 | FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i)) | ||||
2532 | { | ||||
2533 | dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); | ||||
2534 | if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)(dr_info->stmt)->vectorizable) | ||||
2535 | { | ||||
2536 | if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)((dr_info->stmt)->dr_aux.dr && (((void)(!((dr_info ->stmt)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2536, __FUNCTION__), 0 : 0)), (dr_info->stmt)->first_element )) | ||||
2537 | && DR_GROUP_FIRST_ELEMENT (dr_info->stmt)(((void)(!((dr_info->stmt)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2537, __FUNCTION__), 0 : 0)), (dr_info->stmt)->first_element ) != dr_info->stmt) | ||||
2538 | continue; | ||||
2539 | vect_compute_data_ref_alignment (loop_vinfo, dr_info, | ||||
2540 | STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype); | ||||
2541 | } | ||||
2542 | } | ||||
2543 | |||||
2544 | return opt_result::success (); | ||||
2545 | } | ||||
2546 | |||||
2547 | |||||
2548 | /* Analyze alignment of DRs of stmts in NODE. */ | ||||
2549 | |||||
2550 | static bool | ||||
2551 | vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node) | ||||
2552 | { | ||||
2553 | /* Alignment is maintained in the first element of the group. */ | ||||
2554 | stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)(node)->stmts[0]; | ||||
2555 | first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info)(((void)(!((first_stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2555, __FUNCTION__), 0 : 0)), (first_stmt_info)->first_element ); | ||||
2556 | dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info)(((void)(!((first_stmt_info)->dr_aux.stmt == (first_stmt_info )) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2556, __FUNCTION__), 0 : 0)), &(first_stmt_info)->dr_aux ); | ||||
2557 | tree vectype = SLP_TREE_VECTYPE (node)(node)->vectype; | ||||
2558 | poly_uint64 vector_alignment | ||||
2559 | = exact_div (targetm.vectorize.preferred_vector_alignment (vectype), | ||||
2560 | BITS_PER_UNIT(8)); | ||||
2561 | if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED(-2)) | ||||
2562 | vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node)(node)->vectype); | ||||
2563 | /* Re-analyze alignment when we're facing a vectorization with a bigger | ||||
2564 | alignment requirement. */ | ||||
2565 | else if (known_lt (dr_info->target_alignment, vector_alignment)(!maybe_le (vector_alignment, dr_info->target_alignment))) | ||||
2566 | { | ||||
2567 | poly_uint64 old_target_alignment = dr_info->target_alignment; | ||||
2568 | int old_misalignment = dr_info->misalignment; | ||||
2569 | vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node)(node)->vectype); | ||||
2570 | /* But keep knowledge about a smaller alignment. */ | ||||
2571 | if (old_misalignment != DR_MISALIGNMENT_UNKNOWN(-1) | ||||
2572 | && dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN(-1)) | ||||
2573 | { | ||||
2574 | dr_info->target_alignment = old_target_alignment; | ||||
2575 | dr_info->misalignment = old_misalignment; | ||||
2576 | } | ||||
2577 | } | ||||
2578 | /* When we ever face unordered target alignments the first one wins in terms | ||||
2579 | of analyzing and the other will become unknown in dr_misalignment. */ | ||||
2580 | return true; | ||||
2581 | } | ||||
2582 | |||||
2583 | /* Function vect_slp_analyze_instance_alignment | ||||
2584 | |||||
2585 | Analyze the alignment of the data-references in the SLP instance. | ||||
2586 | Return FALSE if a data reference is found that cannot be vectorized. */ | ||||
2587 | |||||
2588 | bool | ||||
2589 | vect_slp_analyze_instance_alignment (vec_info *vinfo, | ||||
2590 | slp_instance instance) | ||||
2591 | { | ||||
2592 | DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment")auto_dump_scope scope ("vect_slp_analyze_instance_alignment", vect_location); | ||||
2593 | |||||
2594 | slp_tree node; | ||||
2595 | unsigned i; | ||||
2596 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)for (i = 0; ((instance)->loads).iterate ((i), &(node)) ; ++(i)) | ||||
2597 | if (! vect_slp_analyze_node_alignment (vinfo, node)) | ||||
2598 | return false; | ||||
2599 | |||||
2600 | if (SLP_INSTANCE_KIND (instance)(instance)->kind == slp_inst_kind_store | ||||
2601 | && ! vect_slp_analyze_node_alignment | ||||
2602 | (vinfo, SLP_INSTANCE_TREE (instance)(instance)->root)) | ||||
2603 | return false; | ||||
2604 | |||||
2605 | return true; | ||||
2606 | } | ||||
2607 | |||||
2608 | |||||
2609 | /* Analyze groups of accesses: check that DR_INFO belongs to a group of | ||||
2610 | accesses of legal size, step, etc. Detect gaps, single element | ||||
2611 | interleaving, and other special cases. Set grouped access info. | ||||
2612 | Collect groups of strided stores for further use in SLP analysis. | ||||
2613 | Worker for vect_analyze_group_access. */ | ||||
2614 | |||||
2615 | static bool | ||||
2616 | vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info) | ||||
2617 | { | ||||
2618 | data_reference *dr = dr_info->dr; | ||||
2619 | tree step = DR_STEP (dr)(dr)->innermost.step; | ||||
2620 | tree scalar_type = TREE_TYPE (DR_REF (dr))((contains_struct_check (((dr)->ref), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2620, __FUNCTION__))->typed.type); | ||||
2621 | HOST_WIDE_INTlong type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type))((unsigned long) (*tree_int_cst_elt_check ((((tree_class_check ((scalar_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2621, __FUNCTION__))->type_common.size_unit)), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2621, __FUNCTION__))); | ||||
2622 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
2623 | loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); | ||||
2624 | bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); | ||||
2625 | HOST_WIDE_INTlong dr_step = -1; | ||||
2626 | HOST_WIDE_INTlong groupsize, last_accessed_element = 1; | ||||
2627 | bool slp_impossible = false; | ||||
2628 | |||||
2629 | /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the | ||||
2630 | size of the interleaving group (including gaps). */ | ||||
2631 | if (tree_fits_shwi_p (step)) | ||||
2632 | { | ||||
2633 | dr_step = tree_to_shwi (step); | ||||
2634 | /* Check that STEP is a multiple of type size. Otherwise there is | ||||
2635 | a non-element-sized gap at the end of the group which we | ||||
2636 | cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE. | ||||
2637 | ??? As we can handle non-constant step fine here we should | ||||
2638 | simply remove uses of DR_GROUP_GAP between the last and first | ||||
2639 | element and instead rely on DR_STEP. DR_GROUP_SIZE then would | ||||
2640 | simply not include that gap. */ | ||||
2641 | if ((dr_step % type_size) != 0) | ||||
2642 | { | ||||
2643 | if (dump_enabled_p ()) | ||||
2644 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2645 | "Step %T is not a multiple of the element size" | ||||
2646 | " for %T\n", | ||||
2647 | step, DR_REF (dr)(dr)->ref); | ||||
2648 | return false; | ||||
2649 | } | ||||
2650 | groupsize = absu_hwi (dr_step) / type_size; | ||||
2651 | } | ||||
2652 | else | ||||
2653 | groupsize = 0; | ||||
2654 | |||||
2655 | /* Not consecutive access is possible only if it is a part of interleaving. */ | ||||
2656 | if (!DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2656, __FUNCTION__), 0 : 0)), (stmt_info)->first_element )) | ||||
2657 | { | ||||
2658 | /* Check if it this DR is a part of interleaving, and is a single | ||||
2659 | element of the group that is accessed in the loop. */ | ||||
2660 | |||||
2661 | /* Gaps are supported only for loads. STEP must be a multiple of the type | ||||
2662 | size. */ | ||||
2663 | if (DR_IS_READ (dr)(dr)->is_read | ||||
2664 | && (dr_step % type_size) == 0 | ||||
2665 | && groupsize > 0 | ||||
2666 | /* This could be UINT_MAX but as we are generating code in a very | ||||
2667 | inefficient way we have to cap earlier. | ||||
2668 | See PR91403 for example. */ | ||||
2669 | && groupsize <= 4096) | ||||
2670 | { | ||||
2671 | DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2671, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ) = stmt_info; | ||||
2672 | DR_GROUP_SIZE (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2672, __FUNCTION__), 0 : 0)), (stmt_info)->size) = groupsize; | ||||
2673 | DR_GROUP_GAP (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2673, __FUNCTION__), 0 : 0)), (stmt_info)->gap) = groupsize - 1; | ||||
2674 | if (dump_enabled_p ()) | ||||
2675 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2676 | "Detected single element interleaving %T" | ||||
2677 | " step %T\n", | ||||
2678 | DR_REF (dr)(dr)->ref, step); | ||||
2679 | |||||
2680 | return true; | ||||
2681 | } | ||||
2682 | |||||
2683 | if (dump_enabled_p ()) | ||||
2684 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
2685 | "not consecutive access %G", stmt_info->stmt); | ||||
2686 | |||||
2687 | if (bb_vinfo) | ||||
2688 | { | ||||
2689 | /* Mark the statement as unvectorizable. */ | ||||
2690 | STMT_VINFO_VECTORIZABLE (stmt_info)(stmt_info)->vectorizable = false; | ||||
2691 | return true; | ||||
2692 | } | ||||
2693 | |||||
2694 | if (dump_enabled_p ()) | ||||
2695 | dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n"); | ||||
2696 | STMT_VINFO_STRIDED_P (stmt_info)(stmt_info)->strided_p = true; | ||||
2697 | return true; | ||||
2698 | } | ||||
2699 | |||||
2700 | if (DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2700, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ) == stmt_info) | ||||
2701 | { | ||||
2702 | /* First stmt in the interleaving chain. Check the chain. */ | ||||
2703 | stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2703, __FUNCTION__), 0 : 0)), (stmt_info)->next_element); | ||||
2704 | struct data_reference *data_ref = dr; | ||||
2705 | unsigned int count = 1; | ||||
2706 | tree prev_init = DR_INIT (data_ref)(data_ref)->innermost.init; | ||||
2707 | HOST_WIDE_INTlong diff, gaps = 0; | ||||
2708 | |||||
2709 | /* By construction, all group members have INTEGER_CST DR_INITs. */ | ||||
2710 | while (next) | ||||
2711 | { | ||||
2712 | /* We never have the same DR multiple times. */ | ||||
2713 | gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),((void)(!(tree_int_cst_compare ((data_ref)->innermost.init , (((next)->dr_aux.dr + 0))->innermost.init) != 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2714, __FUNCTION__), 0 : 0)) | ||||
2714 | DR_INIT (STMT_VINFO_DATA_REF (next))) != 0)((void)(!(tree_int_cst_compare ((data_ref)->innermost.init , (((next)->dr_aux.dr + 0))->innermost.init) != 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2714, __FUNCTION__), 0 : 0)); | ||||
2715 | |||||
2716 | data_ref = STMT_VINFO_DATA_REF (next)((next)->dr_aux.dr + 0); | ||||
2717 | |||||
2718 | /* All group members have the same STEP by construction. */ | ||||
2719 | gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0))((void)(!(operand_equal_p ((data_ref)->innermost.step, step , 0)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2719, __FUNCTION__), 0 : 0)); | ||||
2720 | |||||
2721 | /* Check that the distance between two accesses is equal to the type | ||||
2722 | size. Otherwise, we have gaps. */ | ||||
2723 | diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))((unsigned long) (*tree_int_cst_elt_check (((data_ref)->innermost .init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2723, __FUNCTION__))) | ||||
2724 | - TREE_INT_CST_LOW (prev_init)((unsigned long) (*tree_int_cst_elt_check ((prev_init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2724, __FUNCTION__)))) / type_size; | ||||
2725 | if (diff < 1 || diff > UINT_MAX(2147483647 *2U +1U)) | ||||
2726 | { | ||||
2727 | /* For artificial testcases with array accesses with large | ||||
2728 | constant indices we can run into overflow issues which | ||||
2729 | can end up fooling the groupsize constraint below so | ||||
2730 | check the individual gaps (which are represented as | ||||
2731 | unsigned int) as well. */ | ||||
2732 | if (dump_enabled_p ()) | ||||
2733 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
2734 | "interleaved access with gap larger " | ||||
2735 | "than representable\n"); | ||||
2736 | return false; | ||||
2737 | } | ||||
2738 | if (diff != 1) | ||||
2739 | { | ||||
2740 | /* FORNOW: SLP of accesses with gaps is not supported. */ | ||||
2741 | slp_impossible = true; | ||||
2742 | if (DR_IS_WRITE (data_ref)(!(data_ref)->is_read)) | ||||
2743 | { | ||||
2744 | if (dump_enabled_p ()) | ||||
2745 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
2746 | "interleaved store with gaps\n"); | ||||
2747 | return false; | ||||
2748 | } | ||||
2749 | |||||
2750 | gaps += diff - 1; | ||||
2751 | } | ||||
2752 | |||||
2753 | last_accessed_element += diff; | ||||
2754 | |||||
2755 | /* Store the gap from the previous member of the group. If there is no | ||||
2756 | gap in the access, DR_GROUP_GAP is always 1. */ | ||||
2757 | DR_GROUP_GAP (next)(((void)(!((next)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2757, __FUNCTION__), 0 : 0)), (next)->gap) = diff; | ||||
2758 | |||||
2759 | prev_init = DR_INIT (data_ref)(data_ref)->innermost.init; | ||||
2760 | next = DR_GROUP_NEXT_ELEMENT (next)(((void)(!((next)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2760, __FUNCTION__), 0 : 0)), (next)->next_element); | ||||
2761 | /* Count the number of data-refs in the chain. */ | ||||
2762 | count++; | ||||
2763 | } | ||||
2764 | |||||
2765 | if (groupsize == 0) | ||||
2766 | groupsize = count + gaps; | ||||
2767 | |||||
2768 | /* This could be UINT_MAX but as we are generating code in a very | ||||
2769 | inefficient way we have to cap earlier. See PR78699 for example. */ | ||||
2770 | if (groupsize > 4096) | ||||
2771 | { | ||||
2772 | if (dump_enabled_p ()) | ||||
2773 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
2774 | "group is too large\n"); | ||||
2775 | return false; | ||||
2776 | } | ||||
2777 | |||||
2778 | /* Check that the size of the interleaving is equal to count for stores, | ||||
2779 | i.e., that there are no gaps. */ | ||||
2780 | if (groupsize != count | ||||
2781 | && !DR_IS_READ (dr)(dr)->is_read) | ||||
2782 | { | ||||
2783 | groupsize = count; | ||||
2784 | STMT_VINFO_STRIDED_P (stmt_info)(stmt_info)->strided_p = true; | ||||
2785 | } | ||||
2786 | |||||
2787 | /* If there is a gap after the last load in the group it is the | ||||
2788 | difference between the groupsize and the last accessed | ||||
2789 | element. | ||||
2790 | When there is no gap, this difference should be 0. */ | ||||
2791 | DR_GROUP_GAP (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2791, __FUNCTION__), 0 : 0)), (stmt_info)->gap) = groupsize - last_accessed_element; | ||||
2792 | |||||
2793 | DR_GROUP_SIZE (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2793, __FUNCTION__), 0 : 0)), (stmt_info)->size) = groupsize; | ||||
2794 | if (dump_enabled_p ()) | ||||
2795 | { | ||||
2796 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2797 | "Detected interleaving "); | ||||
2798 | if (DR_IS_READ (dr)(dr)->is_read) | ||||
2799 | dump_printf (MSG_NOTE, "load "); | ||||
2800 | else if (STMT_VINFO_STRIDED_P (stmt_info)(stmt_info)->strided_p) | ||||
2801 | dump_printf (MSG_NOTE, "strided store "); | ||||
2802 | else | ||||
2803 | dump_printf (MSG_NOTE, "store "); | ||||
2804 | dump_printf (MSG_NOTE, "of size %u\n", | ||||
2805 | (unsigned)groupsize); | ||||
2806 | dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt); | ||||
2807 | next = DR_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2807, __FUNCTION__), 0 : 0)), (stmt_info)->next_element); | ||||
2808 | while (next) | ||||
2809 | { | ||||
2810 | if (DR_GROUP_GAP (next)(((void)(!((next)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2810, __FUNCTION__), 0 : 0)), (next)->gap) != 1) | ||||
2811 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2812 | "\t<gap of %d elements>\n", | ||||
2813 | DR_GROUP_GAP (next)(((void)(!((next)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2813, __FUNCTION__), 0 : 0)), (next)->gap) - 1); | ||||
2814 | dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt); | ||||
2815 | next = DR_GROUP_NEXT_ELEMENT (next)(((void)(!((next)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2815, __FUNCTION__), 0 : 0)), (next)->next_element); | ||||
2816 | } | ||||
2817 | if (DR_GROUP_GAP (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2817, __FUNCTION__), 0 : 0)), (stmt_info)->gap) != 0) | ||||
2818 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2819 | "\t<gap of %d elements>\n", | ||||
2820 | DR_GROUP_GAP (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2820, __FUNCTION__), 0 : 0)), (stmt_info)->gap)); | ||||
2821 | } | ||||
2822 | |||||
2823 | /* SLP: create an SLP data structure for every interleaving group of | ||||
2824 | stores for further analysis in vect_analyse_slp. */ | ||||
2825 | if (DR_IS_WRITE (dr)(!(dr)->is_read) && !slp_impossible) | ||||
2826 | { | ||||
2827 | if (loop_vinfo) | ||||
2828 | LOOP_VINFO_GROUPED_STORES (loop_vinfo)(loop_vinfo)->grouped_stores.safe_push (stmt_info); | ||||
2829 | if (bb_vinfo) | ||||
2830 | BB_VINFO_GROUPED_STORES (bb_vinfo)(bb_vinfo)->grouped_stores.safe_push (stmt_info); | ||||
2831 | } | ||||
2832 | } | ||||
2833 | |||||
2834 | return true; | ||||
2835 | } | ||||
2836 | |||||
2837 | /* Analyze groups of accesses: check that DR_INFO belongs to a group of | ||||
2838 | accesses of legal size, step, etc. Detect gaps, single element | ||||
2839 | interleaving, and other special cases. Set grouped access info. | ||||
2840 | Collect groups of strided stores for further use in SLP analysis. */ | ||||
2841 | |||||
2842 | static bool | ||||
2843 | vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info) | ||||
2844 | { | ||||
2845 | if (!vect_analyze_group_access_1 (vinfo, dr_info)) | ||||
2846 | { | ||||
2847 | /* Dissolve the group if present. */ | ||||
2848 | stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt)(((void)(!((dr_info->stmt)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2848, __FUNCTION__), 0 : 0)), (dr_info->stmt)->first_element ); | ||||
2849 | while (stmt_info) | ||||
2850 | { | ||||
2851 | stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2851, __FUNCTION__), 0 : 0)), (stmt_info)->next_element); | ||||
2852 | DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2852, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ) = NULLnullptr; | ||||
2853 | DR_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2853, __FUNCTION__), 0 : 0)), (stmt_info)->next_element) = NULLnullptr; | ||||
2854 | stmt_info = next; | ||||
2855 | } | ||||
2856 | return false; | ||||
2857 | } | ||||
2858 | return true; | ||||
2859 | } | ||||
2860 | |||||
2861 | /* Analyze the access pattern of the data-reference DR_INFO. | ||||
2862 | In case of non-consecutive accesses call vect_analyze_group_access() to | ||||
2863 | analyze groups of accesses. */ | ||||
2864 | |||||
2865 | static bool | ||||
2866 | vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info) | ||||
2867 | { | ||||
2868 | data_reference *dr = dr_info->dr; | ||||
2869 | tree step = DR_STEP (dr)(dr)->innermost.step; | ||||
2870 | tree scalar_type = TREE_TYPE (DR_REF (dr))((contains_struct_check (((dr)->ref), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2870, __FUNCTION__))->typed.type); | ||||
2871 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
2872 | loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); | ||||
2873 | class loop *loop = NULLnullptr; | ||||
2874 | |||||
2875 | if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)(stmt_info)->gather_scatter_p) | ||||
2876 | return true; | ||||
2877 | |||||
2878 | if (loop_vinfo) | ||||
2879 | loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||
2880 | |||||
2881 | if (loop_vinfo && !step) | ||||
2882 | { | ||||
2883 | if (dump_enabled_p ()) | ||||
2884 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
2885 | "bad data-ref access in loop\n"); | ||||
2886 | return false; | ||||
2887 | } | ||||
2888 | |||||
2889 | /* Allow loads with zero step in inner-loop vectorization. */ | ||||
2890 | if (loop_vinfo && integer_zerop (step)) | ||||
2891 | { | ||||
2892 | DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2892, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ) = NULLnullptr; | ||||
2893 | if (!nested_in_vect_loop_p (loop, stmt_info)) | ||||
2894 | return DR_IS_READ (dr)(dr)->is_read; | ||||
2895 | /* Allow references with zero step for outer loops marked | ||||
2896 | with pragma omp simd only - it guarantees absence of | ||||
2897 | loop-carried dependencies between inner loop iterations. */ | ||||
2898 | if (loop->safelen < 2) | ||||
2899 | { | ||||
2900 | if (dump_enabled_p ()) | ||||
2901 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2902 | "zero step in inner loop of nest\n"); | ||||
2903 | return false; | ||||
2904 | } | ||||
2905 | } | ||||
2906 | |||||
2907 | if (loop && nested_in_vect_loop_p (loop, stmt_info)) | ||||
2908 | { | ||||
2909 | /* Interleaved accesses are not yet supported within outer-loop | ||||
2910 | vectorization for references in the inner-loop. */ | ||||
2911 | DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2911, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ) = NULLnullptr; | ||||
2912 | |||||
2913 | /* For the rest of the analysis we use the outer-loop step. */ | ||||
2914 | step = STMT_VINFO_DR_STEP (stmt_info)(stmt_info)->dr_wrt_vec_loop.step; | ||||
2915 | if (integer_zerop (step)) | ||||
2916 | { | ||||
2917 | if (dump_enabled_p ()) | ||||
2918 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2919 | "zero step in outer loop.\n"); | ||||
2920 | return DR_IS_READ (dr)(dr)->is_read; | ||||
2921 | } | ||||
2922 | } | ||||
2923 | |||||
2924 | /* Consecutive? */ | ||||
2925 | if (TREE_CODE (step)((enum tree_code) (step)->base.code) == INTEGER_CST) | ||||
2926 | { | ||||
2927 | HOST_WIDE_INTlong dr_step = TREE_INT_CST_LOW (step)((unsigned long) (*tree_int_cst_elt_check ((step), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2927, __FUNCTION__))); | ||||
2928 | if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)((tree_class_check ((scalar_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2928, __FUNCTION__))->type_common.size_unit)) | ||||
2929 | || (dr_step < 0 | ||||
2930 | && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type)((tree_class_check ((scalar_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2930, __FUNCTION__))->type_common.size_unit), -dr_step))) | ||||
2931 | { | ||||
2932 | /* Mark that it is not interleaving. */ | ||||
2933 | DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2933, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ) = NULLnullptr; | ||||
2934 | return true; | ||||
2935 | } | ||||
2936 | } | ||||
2937 | |||||
2938 | if (loop && nested_in_vect_loop_p (loop, stmt_info)) | ||||
2939 | { | ||||
2940 | if (dump_enabled_p ()) | ||||
2941 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
2942 | "grouped access in outer loop.\n"); | ||||
2943 | return false; | ||||
2944 | } | ||||
2945 | |||||
2946 | |||||
2947 | /* Assume this is a DR handled by non-constant strided load case. */ | ||||
2948 | if (TREE_CODE (step)((enum tree_code) (step)->base.code) != INTEGER_CST) | ||||
2949 | return (STMT_VINFO_STRIDED_P (stmt_info)(stmt_info)->strided_p | ||||
2950 | && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)-> dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2950, __FUNCTION__), 0 : 0)), (stmt_info)->first_element )) | ||||
2951 | || vect_analyze_group_access (vinfo, dr_info))); | ||||
2952 | |||||
2953 | /* Not consecutive access - check if it's a part of interleaving group. */ | ||||
2954 | return vect_analyze_group_access (vinfo, dr_info); | ||||
2955 | } | ||||
2956 | |||||
2957 | /* Compare two data-references DRA and DRB to group them into chunks | ||||
2958 | suitable for grouping. */ | ||||
2959 | |||||
2960 | static int | ||||
2961 | dr_group_sort_cmp (const void *dra_, const void *drb_) | ||||
2962 | { | ||||
2963 | dr_vec_info *dra_info = *(dr_vec_info **)const_cast<void *>(dra_); | ||||
2964 | dr_vec_info *drb_info = *(dr_vec_info **)const_cast<void *>(drb_); | ||||
2965 | data_reference_p dra = dra_info->dr; | ||||
2966 | data_reference_p drb = drb_info->dr; | ||||
2967 | int cmp; | ||||
2968 | |||||
2969 | /* Stabilize sort. */ | ||||
2970 | if (dra == drb) | ||||
2971 | return 0; | ||||
2972 | |||||
2973 | /* Different group IDs lead never belong to the same group. */ | ||||
2974 | if (dra_info->group != drb_info->group) | ||||
2975 | return dra_info->group < drb_info->group ? -1 : 1; | ||||
2976 | |||||
2977 | /* Ordering of DRs according to base. */ | ||||
2978 | cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra)(dra)->innermost.base_address, | ||||
2979 | DR_BASE_ADDRESS (drb)(drb)->innermost.base_address); | ||||
2980 | if (cmp != 0) | ||||
2981 | return cmp; | ||||
2982 | |||||
2983 | /* And according to DR_OFFSET. */ | ||||
2984 | cmp = data_ref_compare_tree (DR_OFFSET (dra)(dra)->innermost.offset, DR_OFFSET (drb)(drb)->innermost.offset); | ||||
2985 | if (cmp != 0) | ||||
2986 | return cmp; | ||||
2987 | |||||
2988 | /* Put reads before writes. */ | ||||
2989 | if (DR_IS_READ (dra)(dra)->is_read != DR_IS_READ (drb)(drb)->is_read) | ||||
2990 | return DR_IS_READ (dra)(dra)->is_read ? -1 : 1; | ||||
2991 | |||||
2992 | /* Then sort after access size. */ | ||||
2993 | cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)))((tree_class_check ((((contains_struct_check (((dra)->ref) , (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2993, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2993, __FUNCTION__))->type_common.size_unit), | ||||
2994 | TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))((tree_class_check ((((contains_struct_check (((drb)->ref) , (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2994, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 2994, __FUNCTION__))->type_common.size_unit)); | ||||
2995 | if (cmp != 0) | ||||
2996 | return cmp; | ||||
2997 | |||||
2998 | /* And after step. */ | ||||
2999 | cmp = data_ref_compare_tree (DR_STEP (dra)(dra)->innermost.step, DR_STEP (drb)(drb)->innermost.step); | ||||
3000 | if (cmp != 0) | ||||
3001 | return cmp; | ||||
3002 | |||||
3003 | /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */ | ||||
3004 | cmp = data_ref_compare_tree (DR_INIT (dra)(dra)->innermost.init, DR_INIT (drb)(drb)->innermost.init); | ||||
3005 | if (cmp == 0) | ||||
3006 | return gimple_uid (DR_STMT (dra)(dra)->stmt) < gimple_uid (DR_STMT (drb)(drb)->stmt) ? -1 : 1; | ||||
3007 | return cmp; | ||||
3008 | } | ||||
3009 | |||||
3010 | /* If OP is the result of a conversion, return the unconverted value, | ||||
3011 | otherwise return null. */ | ||||
3012 | |||||
3013 | static tree | ||||
3014 | strip_conversion (tree op) | ||||
3015 | { | ||||
3016 | if (TREE_CODE (op)((enum tree_code) (op)->base.code) != SSA_NAME) | ||||
3017 | return NULL_TREE(tree) nullptr; | ||||
3018 | gimple *stmt = SSA_NAME_DEF_STMT (op)(tree_check ((op), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3018, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt; | ||||
3019 | if (!is_gimple_assign (stmt) | ||||
3020 | || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))((gimple_assign_rhs_code (stmt)) == NOP_EXPR || (gimple_assign_rhs_code (stmt)) == CONVERT_EXPR)) | ||||
3021 | return NULL_TREE(tree) nullptr; | ||||
3022 | return gimple_assign_rhs1 (stmt); | ||||
3023 | } | ||||
3024 | |||||
3025 | /* Return true if vectorizable_* routines can handle statements STMT1_INFO | ||||
3026 | and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can | ||||
3027 | be grouped in SLP mode. */ | ||||
3028 | |||||
3029 | static bool | ||||
3030 | can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info, | ||||
3031 | bool allow_slp_p) | ||||
3032 | { | ||||
3033 | if (gimple_assign_single_p (stmt1_info->stmt)) | ||||
3034 | return gimple_assign_single_p (stmt2_info->stmt); | ||||
3035 | |||||
3036 | gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt); | ||||
3037 | if (call1 && gimple_call_internal_p (call1)) | ||||
3038 | { | ||||
3039 | /* Check for two masked loads or two masked stores. */ | ||||
3040 | gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt); | ||||
3041 | if (!call2 || !gimple_call_internal_p (call2)) | ||||
3042 | return false; | ||||
3043 | internal_fn ifn = gimple_call_internal_fn (call1); | ||||
3044 | if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE) | ||||
3045 | return false; | ||||
3046 | if (ifn != gimple_call_internal_fn (call2)) | ||||
3047 | return false; | ||||
3048 | |||||
3049 | /* Check that the masks are the same. Cope with casts of masks, | ||||
3050 | like those created by build_mask_conversion. */ | ||||
3051 | tree mask1 = gimple_call_arg (call1, 2); | ||||
3052 | tree mask2 = gimple_call_arg (call2, 2); | ||||
3053 | if (!operand_equal_p (mask1, mask2, 0) | ||||
3054 | && (ifn == IFN_MASK_STORE || !allow_slp_p)) | ||||
3055 | { | ||||
3056 | mask1 = strip_conversion (mask1); | ||||
3057 | if (!mask1) | ||||
3058 | return false; | ||||
3059 | mask2 = strip_conversion (mask2); | ||||
3060 | if (!mask2) | ||||
3061 | return false; | ||||
3062 | if (!operand_equal_p (mask1, mask2, 0)) | ||||
3063 | return false; | ||||
3064 | } | ||||
3065 | return true; | ||||
3066 | } | ||||
3067 | |||||
3068 | return false; | ||||
3069 | } | ||||
3070 | |||||
3071 | /* Function vect_analyze_data_ref_accesses. | ||||
3072 | |||||
3073 | Analyze the access pattern of all the data references in the loop. | ||||
3074 | |||||
3075 | FORNOW: the only access pattern that is considered vectorizable is a | ||||
3076 | simple step 1 (consecutive) access. | ||||
3077 | |||||
3078 | FORNOW: handle only arrays and pointer accesses. */ | ||||
3079 | |||||
3080 | opt_result | ||||
3081 | vect_analyze_data_ref_accesses (vec_info *vinfo, | ||||
3082 | vec<int> *dataref_groups) | ||||
3083 | { | ||||
3084 | unsigned int i; | ||||
3085 | vec<data_reference_p> datarefs = vinfo->shared->datarefs; | ||||
3086 | |||||
3087 | DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses")auto_dump_scope scope ("vect_analyze_data_ref_accesses", vect_location ); | ||||
3088 | |||||
3089 | if (datarefs.is_empty ()) | ||||
3090 | return opt_result::success (); | ||||
3091 | |||||
3092 | /* Sort the array of datarefs to make building the interleaving chains | ||||
3093 | linear. Don't modify the original vector's order, it is needed for | ||||
3094 | determining what dependencies are reversed. */ | ||||
3095 | vec<dr_vec_info *> datarefs_copy; | ||||
3096 | datarefs_copy.create (datarefs.length ()); | ||||
3097 | for (unsigned i = 0; i < datarefs.length (); i++) | ||||
3098 | { | ||||
3099 | dr_vec_info *dr_info = vinfo->lookup_dr (datarefs[i]); | ||||
3100 | /* If the caller computed DR grouping use that, otherwise group by | ||||
3101 | basic blocks. */ | ||||
3102 | if (dataref_groups) | ||||
3103 | dr_info->group = (*dataref_groups)[i]; | ||||
3104 | else | ||||
3105 | dr_info->group = gimple_bb (DR_STMT (datarefs[i])(datarefs[i])->stmt)->index; | ||||
3106 | datarefs_copy.quick_push (dr_info); | ||||
3107 | } | ||||
3108 | datarefs_copy.qsort (dr_group_sort_cmp)qsort (dr_group_sort_cmp); | ||||
3109 | hash_set<stmt_vec_info> to_fixup; | ||||
3110 | |||||
3111 | /* Build the interleaving chains. */ | ||||
3112 | for (i = 0; i < datarefs_copy.length () - 1;) | ||||
3113 | { | ||||
3114 | dr_vec_info *dr_info_a = datarefs_copy[i]; | ||||
3115 | data_reference_p dra = dr_info_a->dr; | ||||
3116 | int dra_group_id = dr_info_a->group; | ||||
3117 | stmt_vec_info stmtinfo_a = dr_info_a->stmt; | ||||
3118 | stmt_vec_info lastinfo = NULLnullptr; | ||||
3119 | if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)(stmtinfo_a)->vectorizable | ||||
3120 | || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)(stmtinfo_a)->gather_scatter_p) | ||||
3121 | { | ||||
3122 | ++i; | ||||
3123 | continue; | ||||
3124 | } | ||||
3125 | for (i = i + 1; i < datarefs_copy.length (); ++i) | ||||
3126 | { | ||||
3127 | dr_vec_info *dr_info_b = datarefs_copy[i]; | ||||
3128 | data_reference_p drb = dr_info_b->dr; | ||||
3129 | int drb_group_id = dr_info_b->group; | ||||
3130 | stmt_vec_info stmtinfo_b = dr_info_b->stmt; | ||||
3131 | if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)(stmtinfo_b)->vectorizable | ||||
3132 | || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b)(stmtinfo_b)->gather_scatter_p) | ||||
3133 | break; | ||||
3134 | |||||
3135 | /* ??? Imperfect sorting (non-compatible types, non-modulo | ||||
3136 | accesses, same accesses) can lead to a group to be artificially | ||||
3137 | split here as we don't just skip over those. If it really | ||||
3138 | matters we can push those to a worklist and re-iterate | ||||
3139 | over them. The we can just skip ahead to the next DR here. */ | ||||
3140 | |||||
3141 | /* DRs in a different DR group should not be put into the same | ||||
3142 | interleaving group. */ | ||||
3143 | if (dra_group_id != drb_group_id) | ||||
3144 | break; | ||||
3145 | |||||
3146 | /* Check that the data-refs have same first location (except init) | ||||
3147 | and they are both either store or load (not load and store, | ||||
3148 | not masked loads or stores). */ | ||||
3149 | if (DR_IS_READ (dra)(dra)->is_read != DR_IS_READ (drb)(drb)->is_read | ||||
3150 | || data_ref_compare_tree (DR_BASE_ADDRESS (dra)(dra)->innermost.base_address, | ||||
3151 | DR_BASE_ADDRESS (drb)(drb)->innermost.base_address) != 0 | ||||
3152 | || data_ref_compare_tree (DR_OFFSET (dra)(dra)->innermost.offset, DR_OFFSET (drb)(drb)->innermost.offset) != 0 | ||||
3153 | || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true)) | ||||
3154 | break; | ||||
3155 | |||||
3156 | /* Check that the data-refs have the same constant size. */ | ||||
3157 | tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)))((tree_class_check ((((contains_struct_check (((dra)->ref) , (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3157, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3157, __FUNCTION__))->type_common.size_unit); | ||||
3158 | tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))((tree_class_check ((((contains_struct_check (((drb)->ref) , (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3158, __FUNCTION__))->typed.type)), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3158, __FUNCTION__))->type_common.size_unit); | ||||
3159 | if (!tree_fits_uhwi_p (sza) | ||||
3160 | || !tree_fits_uhwi_p (szb) | ||||
3161 | || !tree_int_cst_equal (sza, szb)) | ||||
3162 | break; | ||||
3163 | |||||
3164 | /* Check that the data-refs have the same step. */ | ||||
3165 | if (data_ref_compare_tree (DR_STEP (dra)(dra)->innermost.step, DR_STEP (drb)(drb)->innermost.step) != 0) | ||||
3166 | break; | ||||
3167 | |||||
3168 | /* Check the types are compatible. | ||||
3169 | ??? We don't distinguish this during sorting. */ | ||||
3170 | if (!types_compatible_p (TREE_TYPE (DR_REF (dra))((contains_struct_check (((dra)->ref), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3170, __FUNCTION__))->typed.type), | ||||
3171 | TREE_TYPE (DR_REF (drb))((contains_struct_check (((drb)->ref), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3171, __FUNCTION__))->typed.type))) | ||||
3172 | break; | ||||
3173 | |||||
3174 | /* Check that the DR_INITs are compile-time constants. */ | ||||
3175 | if (!tree_fits_shwi_p (DR_INIT (dra)(dra)->innermost.init) | ||||
3176 | || !tree_fits_shwi_p (DR_INIT (drb)(drb)->innermost.init)) | ||||
3177 | break; | ||||
3178 | |||||
3179 | /* Different .GOMP_SIMD_LANE calls still give the same lane, | ||||
3180 | just hold extra information. */ | ||||
3181 | if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)(stmtinfo_a)->simd_lane_access_p | ||||
3182 | && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)(stmtinfo_b)->simd_lane_access_p | ||||
3183 | && data_ref_compare_tree (DR_INIT (dra)(dra)->innermost.init, DR_INIT (drb)(drb)->innermost.init) == 0) | ||||
3184 | break; | ||||
3185 | |||||
3186 | /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */ | ||||
3187 | HOST_WIDE_INTlong init_a = TREE_INT_CST_LOW (DR_INIT (dra))((unsigned long) (*tree_int_cst_elt_check (((dra)->innermost .init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3187, __FUNCTION__))); | ||||
3188 | HOST_WIDE_INTlong init_b = TREE_INT_CST_LOW (DR_INIT (drb))((unsigned long) (*tree_int_cst_elt_check (((drb)->innermost .init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3188, __FUNCTION__))); | ||||
3189 | HOST_WIDE_INTlong init_prev | ||||
3190 | = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]->dr))((unsigned long) (*tree_int_cst_elt_check (((datarefs_copy[i- 1]->dr)->innermost.init), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3190, __FUNCTION__))); | ||||
3191 | gcc_assert (init_a <= init_b((void)(!(init_a <= init_b && init_a <= init_prev && init_prev <= init_b) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3193, __FUNCTION__), 0 : 0)) | ||||
3192 | && init_a <= init_prev((void)(!(init_a <= init_b && init_a <= init_prev && init_prev <= init_b) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3193, __FUNCTION__), 0 : 0)) | ||||
3193 | && init_prev <= init_b)((void)(!(init_a <= init_b && init_a <= init_prev && init_prev <= init_b) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3193, __FUNCTION__), 0 : 0)); | ||||
3194 | |||||
3195 | /* Do not place the same access in the interleaving chain twice. */ | ||||
3196 | if (init_b == init_prev) | ||||
3197 | { | ||||
3198 | gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]->dr))((void)(!(gimple_uid ((datarefs_copy[i-1]->dr)->stmt) < gimple_uid ((drb)->stmt)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3199, __FUNCTION__), 0 : 0)) | ||||
3199 | < gimple_uid (DR_STMT (drb)))((void)(!(gimple_uid ((datarefs_copy[i-1]->dr)->stmt) < gimple_uid ((drb)->stmt)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3199, __FUNCTION__), 0 : 0)); | ||||
3200 | /* Simply link in duplicates and fix up the chain below. */ | ||||
3201 | } | ||||
3202 | else | ||||
3203 | { | ||||
3204 | /* If init_b == init_a + the size of the type * k, we have an | ||||
3205 | interleaving, and DRA is accessed before DRB. */ | ||||
3206 | unsigned HOST_WIDE_INTlong type_size_a = tree_to_uhwi (sza); | ||||
3207 | if (type_size_a == 0 | ||||
3208 | || (((unsigned HOST_WIDE_INTlong)init_b - init_a) | ||||
3209 | % type_size_a != 0)) | ||||
3210 | break; | ||||
3211 | |||||
3212 | /* If we have a store, the accesses are adjacent. This splits | ||||
3213 | groups into chunks we support (we don't support vectorization | ||||
3214 | of stores with gaps). */ | ||||
3215 | if (!DR_IS_READ (dra)(dra)->is_read | ||||
3216 | && (((unsigned HOST_WIDE_INTlong)init_b - init_prev) | ||||
3217 | != type_size_a)) | ||||
3218 | break; | ||||
3219 | |||||
3220 | /* If the step (if not zero or non-constant) is smaller than the | ||||
3221 | difference between data-refs' inits this splits groups into | ||||
3222 | suitable sizes. */ | ||||
3223 | if (tree_fits_shwi_p (DR_STEP (dra)(dra)->innermost.step)) | ||||
3224 | { | ||||
3225 | unsigned HOST_WIDE_INTlong step | ||||
3226 | = absu_hwi (tree_to_shwi (DR_STEP (dra)(dra)->innermost.step)); | ||||
3227 | if (step != 0 | ||||
3228 | && step <= ((unsigned HOST_WIDE_INTlong)init_b - init_a)) | ||||
3229 | break; | ||||
3230 | } | ||||
3231 | } | ||||
3232 | |||||
3233 | if (dump_enabled_p ()) | ||||
3234 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
3235 | DR_IS_READ (dra)(dra)->is_read | ||||
3236 | ? "Detected interleaving load %T and %T\n" | ||||
3237 | : "Detected interleaving store %T and %T\n", | ||||
3238 | DR_REF (dra)(dra)->ref, DR_REF (drb)(drb)->ref); | ||||
3239 | |||||
3240 | /* Link the found element into the group list. */ | ||||
3241 | if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a)(((void)(!((stmtinfo_a)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3241, __FUNCTION__), 0 : 0)), (stmtinfo_a)->first_element )) | ||||
3242 | { | ||||
3243 | DR_GROUP_FIRST_ELEMENT (stmtinfo_a)(((void)(!((stmtinfo_a)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3243, __FUNCTION__), 0 : 0)), (stmtinfo_a)->first_element ) = stmtinfo_a; | ||||
3244 | lastinfo = stmtinfo_a; | ||||
3245 | } | ||||
3246 | DR_GROUP_FIRST_ELEMENT (stmtinfo_b)(((void)(!((stmtinfo_b)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3246, __FUNCTION__), 0 : 0)), (stmtinfo_b)->first_element ) = stmtinfo_a; | ||||
3247 | DR_GROUP_NEXT_ELEMENT (lastinfo)(((void)(!((lastinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3247, __FUNCTION__), 0 : 0)), (lastinfo)->next_element) = stmtinfo_b; | ||||
3248 | lastinfo = stmtinfo_b; | ||||
3249 | |||||
3250 | STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)(stmtinfo_a)->slp_vect_only_p | ||||
3251 | = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false); | ||||
3252 | |||||
3253 | if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)(stmtinfo_a)->slp_vect_only_p) | ||||
3254 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
3255 | "Load suitable for SLP vectorization only.\n"); | ||||
3256 | |||||
3257 | if (init_b == init_prev | ||||
3258 | && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)(((void)(!((stmtinfo_a)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3258, __FUNCTION__), 0 : 0)), (stmtinfo_a)->first_element )) | ||||
3259 | && dump_enabled_p ()) | ||||
3260 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
3261 | "Queuing group with duplicate access for fixup\n"); | ||||
3262 | } | ||||
3263 | } | ||||
3264 | |||||
3265 | /* Fixup groups with duplicate entries by splitting it. */ | ||||
3266 | while (1) | ||||
3267 | { | ||||
3268 | hash_set<stmt_vec_info>::iterator it = to_fixup.begin (); | ||||
3269 | if (!(it != to_fixup.end ())) | ||||
3270 | break; | ||||
3271 | stmt_vec_info grp = *it; | ||||
3272 | to_fixup.remove (grp); | ||||
3273 | |||||
3274 | /* Find the earliest duplicate group member. */ | ||||
3275 | unsigned first_duplicate = -1u; | ||||
3276 | stmt_vec_info next, g = grp; | ||||
3277 | while ((next = DR_GROUP_NEXT_ELEMENT (g)(((void)(!((g)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3277, __FUNCTION__), 0 : 0)), (g)->next_element))) | ||||
3278 | { | ||||
3279 | if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr)((((void)(!((next)->dr_aux.stmt == (next)) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3279, __FUNCTION__), 0 : 0)), &(next)->dr_aux)->dr )->innermost.init, | ||||
3280 | DR_INIT (STMT_VINFO_DR_INFO (g)->dr)((((void)(!((g)->dr_aux.stmt == (g)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3280, __FUNCTION__), 0 : 0)), &(g)->dr_aux)->dr)-> innermost.init) | ||||
3281 | && gimple_uid (STMT_VINFO_STMT (next)(next)->stmt) < first_duplicate) | ||||
3282 | first_duplicate = gimple_uid (STMT_VINFO_STMT (next)(next)->stmt); | ||||
3283 | g = next; | ||||
3284 | } | ||||
3285 | if (first_duplicate == -1U) | ||||
3286 | continue; | ||||
3287 | |||||
3288 | /* Then move all stmts after the first duplicate to a new group. | ||||
3289 | Note this is a heuristic but one with the property that *it | ||||
3290 | is fixed up completely. */ | ||||
3291 | g = grp; | ||||
3292 | stmt_vec_info newgroup = NULLnullptr, ng = grp; | ||||
3293 | while ((next = DR_GROUP_NEXT_ELEMENT (g)(((void)(!((g)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3293, __FUNCTION__), 0 : 0)), (g)->next_element))) | ||||
3294 | { | ||||
3295 | if (gimple_uid (STMT_VINFO_STMT (next)(next)->stmt) >= first_duplicate) | ||||
3296 | { | ||||
3297 | DR_GROUP_NEXT_ELEMENT (g)(((void)(!((g)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3297, __FUNCTION__), 0 : 0)), (g)->next_element) = DR_GROUP_NEXT_ELEMENT (next)(((void)(!((next)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3297, __FUNCTION__), 0 : 0)), (next)->next_element); | ||||
3298 | if (!newgroup) | ||||
3299 | newgroup = next; | ||||
3300 | else | ||||
3301 | DR_GROUP_NEXT_ELEMENT (ng)(((void)(!((ng)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3301, __FUNCTION__), 0 : 0)), (ng)->next_element) = next; | ||||
3302 | ng = next; | ||||
3303 | DR_GROUP_FIRST_ELEMENT (ng)(((void)(!((ng)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3303, __FUNCTION__), 0 : 0)), (ng)->first_element) = newgroup; | ||||
3304 | } | ||||
3305 | else | ||||
3306 | g = DR_GROUP_NEXT_ELEMENT (g)(((void)(!((g)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3306, __FUNCTION__), 0 : 0)), (g)->next_element); | ||||
3307 | } | ||||
3308 | DR_GROUP_NEXT_ELEMENT (ng)(((void)(!((ng)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3308, __FUNCTION__), 0 : 0)), (ng)->next_element) = NULLnullptr; | ||||
3309 | |||||
3310 | /* Fixup the new group which still may contain duplicates. */ | ||||
3311 | to_fixup.add (newgroup); | ||||
3312 | } | ||||
3313 | |||||
3314 | dr_vec_info *dr_info; | ||||
3315 | FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)for (i = 0; (datarefs_copy).iterate ((i), &(dr_info)); ++ (i)) | ||||
3316 | { | ||||
3317 | if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)(dr_info->stmt)->vectorizable | ||||
3318 | && !vect_analyze_data_ref_access (vinfo, dr_info)) | ||||
3319 | { | ||||
3320 | if (dump_enabled_p ()) | ||||
3321 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||
3322 | "not vectorized: complicated access pattern.\n"); | ||||
3323 | |||||
3324 | if (is_a <bb_vec_info> (vinfo)) | ||||
3325 | { | ||||
3326 | /* Mark the statement as not vectorizable. */ | ||||
3327 | STMT_VINFO_VECTORIZABLE (dr_info->stmt)(dr_info->stmt)->vectorizable = false; | ||||
3328 | continue; | ||||
3329 | } | ||||
3330 | else | ||||
3331 | { | ||||
3332 | datarefs_copy.release (); | ||||
3333 | return opt_result::failure_at (dr_info->stmt->stmt, | ||||
3334 | "not vectorized:" | ||||
3335 | " complicated access pattern.\n"); | ||||
3336 | } | ||||
3337 | } | ||||
3338 | } | ||||
3339 | |||||
3340 | datarefs_copy.release (); | ||||
3341 | return opt_result::success (); | ||||
3342 | } | ||||
3343 | |||||
3344 | /* Function vect_vfa_segment_size. | ||||
3345 | |||||
3346 | Input: | ||||
3347 | DR_INFO: The data reference. | ||||
3348 | LENGTH_FACTOR: segment length to consider. | ||||
3349 | |||||
3350 | Return a value suitable for the dr_with_seg_len::seg_len field. | ||||
3351 | This is the "distance travelled" by the pointer from the first | ||||
3352 | iteration in the segment to the last. Note that it does not include | ||||
3353 | the size of the access; in effect it only describes the first byte. */ | ||||
3354 | |||||
3355 | static tree | ||||
3356 | vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor) | ||||
3357 | { | ||||
3358 | length_factor = size_binop (MINUS_EXPR,size_binop_loc (((location_t) 0), MINUS_EXPR, fold_convert_loc (((location_t) 0), sizetype_tab[(int) stk_sizetype], length_factor ), global_trees[TI_SIZE_ONE]) | ||||
3359 | fold_convert (sizetype, length_factor),size_binop_loc (((location_t) 0), MINUS_EXPR, fold_convert_loc (((location_t) 0), sizetype_tab[(int) stk_sizetype], length_factor ), global_trees[TI_SIZE_ONE]) | ||||
3360 | size_one_node)size_binop_loc (((location_t) 0), MINUS_EXPR, fold_convert_loc (((location_t) 0), sizetype_tab[(int) stk_sizetype], length_factor ), global_trees[TI_SIZE_ONE]); | ||||
3361 | return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),size_binop_loc (((location_t) 0), MULT_EXPR, fold_convert_loc (((location_t) 0), sizetype_tab[(int) stk_sizetype], (dr_info ->dr)->innermost.step), length_factor) | ||||
3362 | length_factor)size_binop_loc (((location_t) 0), MULT_EXPR, fold_convert_loc (((location_t) 0), sizetype_tab[(int) stk_sizetype], (dr_info ->dr)->innermost.step), length_factor); | ||||
3363 | } | ||||
3364 | |||||
3365 | /* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)), | ||||
3366 | gives the worst-case number of bytes covered by the segment. */ | ||||
3367 | |||||
3368 | static unsigned HOST_WIDE_INTlong | ||||
3369 | vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info) | ||||
3370 | { | ||||
3371 | stmt_vec_info stmt_vinfo = dr_info->stmt; | ||||
3372 | tree ref_type = TREE_TYPE (DR_REF (dr_info->dr))((contains_struct_check (((dr_info->dr)->ref), (TS_TYPED ), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3372, __FUNCTION__))->typed.type); | ||||
3373 | unsigned HOST_WIDE_INTlong ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type)((tree_class_check ((ref_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3373, __FUNCTION__))->type_common.size_unit)); | ||||
3374 | unsigned HOST_WIDE_INTlong access_size = ref_size; | ||||
3375 | if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3375, __FUNCTION__), 0 : 0)), (stmt_vinfo)->first_element )) | ||||
3376 | { | ||||
3377 | gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo)((void)(!((((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3377, __FUNCTION__), 0 : 0)), (stmt_vinfo)->first_element ) == stmt_vinfo) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3377, __FUNCTION__), 0 : 0)); | ||||
3378 | access_size *= DR_GROUP_SIZE (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3378, __FUNCTION__), 0 : 0)), (stmt_vinfo)->size) - DR_GROUP_GAP (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3378, __FUNCTION__), 0 : 0)), (stmt_vinfo)->gap); | ||||
3379 | } | ||||
3380 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype; | ||||
3381 | int misalignment; | ||||
3382 | if (STMT_VINFO_VEC_STMTS (stmt_vinfo)(stmt_vinfo)->vec_stmts.exists () | ||||
3383 | && ((misalignment = dr_misalignment (dr_info, vectype)), true) | ||||
3384 | && (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment) | ||||
3385 | == dr_explicit_realign_optimized)) | ||||
3386 | { | ||||
3387 | /* We might access a full vector's worth. */ | ||||
3388 | access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)((tree_class_check ((vectype), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3388, __FUNCTION__))->type_common.size_unit)) - ref_size; | ||||
3389 | } | ||||
3390 | return access_size; | ||||
3391 | } | ||||
3392 | |||||
3393 | /* Get the minimum alignment for all the scalar accesses that DR_INFO | ||||
3394 | describes. */ | ||||
3395 | |||||
3396 | static unsigned int | ||||
3397 | vect_vfa_align (dr_vec_info *dr_info) | ||||
3398 | { | ||||
3399 | return dr_alignment (dr_info->dr); | ||||
3400 | } | ||||
3401 | |||||
3402 | /* Function vect_no_alias_p. | ||||
3403 | |||||
3404 | Given data references A and B with equal base and offset, see whether | ||||
3405 | the alias relation can be decided at compilation time. Return 1 if | ||||
3406 | it can and the references alias, 0 if it can and the references do | ||||
3407 | not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A, | ||||
3408 | SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent | ||||
3409 | of dr_with_seg_len::{seg_len,access_size} for A and B. */ | ||||
3410 | |||||
3411 | static int | ||||
3412 | vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b, | ||||
3413 | tree segment_length_a, tree segment_length_b, | ||||
3414 | unsigned HOST_WIDE_INTlong access_size_a, | ||||
3415 | unsigned HOST_WIDE_INTlong access_size_b) | ||||
3416 | { | ||||
3417 | poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr)(a->dr)->innermost.init); | ||||
3418 | poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr)(b->dr)->innermost.init); | ||||
3419 | poly_uint64 const_length_a; | ||||
3420 | poly_uint64 const_length_b; | ||||
3421 | |||||
3422 | /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT | ||||
3423 | bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of | ||||
3424 | [a, a+12) */ | ||||
3425 | if (tree_int_cst_compare (DR_STEP (a->dr)(a->dr)->innermost.step, size_zero_nodeglobal_trees[TI_SIZE_ZERO]) < 0) | ||||
3426 | { | ||||
3427 | const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi (); | ||||
3428 | offset_a -= const_length_a; | ||||
3429 | } | ||||
3430 | else | ||||
3431 | const_length_a = tree_to_poly_uint64 (segment_length_a); | ||||
3432 | if (tree_int_cst_compare (DR_STEP (b->dr)(b->dr)->innermost.step, size_zero_nodeglobal_trees[TI_SIZE_ZERO]) < 0) | ||||
3433 | { | ||||
3434 | const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi (); | ||||
3435 | offset_b -= const_length_b; | ||||
3436 | } | ||||
3437 | else | ||||
3438 | const_length_b = tree_to_poly_uint64 (segment_length_b); | ||||
3439 | |||||
3440 | const_length_a += access_size_a; | ||||
3441 | const_length_b += access_size_b; | ||||
3442 | |||||
3443 | if (ranges_known_overlap_p (offset_a, const_length_a, | ||||
3444 | offset_b, const_length_b)) | ||||
3445 | return 1; | ||||
3446 | |||||
3447 | if (!ranges_maybe_overlap_p (offset_a, const_length_a, | ||||
3448 | offset_b, const_length_b)) | ||||
3449 | return 0; | ||||
3450 | |||||
3451 | return -1; | ||||
3452 | } | ||||
3453 | |||||
3454 | /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH | ||||
3455 | in DDR is >= VF. */ | ||||
3456 | |||||
3457 | static bool | ||||
3458 | dependence_distance_ge_vf (data_dependence_relation *ddr, | ||||
3459 | unsigned int loop_depth, poly_uint64 vf) | ||||
3460 | { | ||||
3461 | if (DDR_ARE_DEPENDENT (ddr)(ddr)->are_dependent != NULL_TREE(tree) nullptr | ||||
3462 | || DDR_NUM_DIST_VECTS (ddr)(((ddr)->dist_vects).length ()) == 0) | ||||
3463 | return false; | ||||
3464 | |||||
3465 | /* If the dependence is exact, we should have limited the VF instead. */ | ||||
3466 | gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr))((void)(!((ddr)->could_be_independent_p) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3466, __FUNCTION__), 0 : 0)); | ||||
3467 | |||||
3468 | unsigned int i; | ||||
3469 | lambda_vector dist_v; | ||||
3470 | FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)for (i = 0; (((ddr)->dist_vects)).iterate ((i), &(dist_v )); ++(i)) | ||||
3471 | { | ||||
3472 | HOST_WIDE_INTlong dist = dist_v[loop_depth]; | ||||
3473 | if (dist != 0 | ||||
3474 | && !(dist > 0 && DDR_REVERSED_P (ddr)(ddr)->reversed_p) | ||||
3475 | && maybe_lt ((unsigned HOST_WIDE_INTlong) abs_hwi (dist), vf)) | ||||
3476 | return false; | ||||
3477 | } | ||||
3478 | |||||
3479 | if (dump_enabled_p ()) | ||||
3480 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
3481 | "dependence distance between %T and %T is >= VF\n", | ||||
3482 | DR_REF (DDR_A (ddr))((ddr)->a)->ref, DR_REF (DDR_B (ddr))((ddr)->b)->ref); | ||||
3483 | |||||
3484 | return true; | ||||
3485 | } | ||||
3486 | |||||
3487 | /* Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. */ | ||||
3488 | |||||
3489 | static void | ||||
3490 | dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound) | ||||
3491 | { | ||||
3492 | dump_printf (dump_kind, "%s (%T) >= ", | ||||
3493 | lower_bound.unsigned_p ? "unsigned" : "abs", | ||||
3494 | lower_bound.expr); | ||||
3495 | dump_dec (dump_kind, lower_bound.min_value); | ||||
3496 | } | ||||
3497 | |||||
3498 | /* Record that the vectorized loop requires the vec_lower_bound described | ||||
3499 | by EXPR, UNSIGNED_P and MIN_VALUE. */ | ||||
3500 | |||||
3501 | static void | ||||
3502 | vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p, | ||||
3503 | poly_uint64 min_value) | ||||
3504 | { | ||||
3505 | vec<vec_lower_bound> &lower_bounds | ||||
3506 | = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds; | ||||
3507 | for (unsigned int i = 0; i < lower_bounds.length (); ++i) | ||||
3508 | if (operand_equal_p (lower_bounds[i].expr, expr, 0)) | ||||
3509 | { | ||||
3510 | unsigned_p &= lower_bounds[i].unsigned_p; | ||||
3511 | min_value = upper_bound (lower_bounds[i].min_value, min_value); | ||||
3512 | if (lower_bounds[i].unsigned_p != unsigned_p | ||||
3513 | || maybe_lt (lower_bounds[i].min_value, min_value)) | ||||
3514 | { | ||||
3515 | lower_bounds[i].unsigned_p = unsigned_p; | ||||
3516 | lower_bounds[i].min_value = min_value; | ||||
3517 | if (dump_enabled_p ()) | ||||
3518 | { | ||||
3519 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
3520 | "updating run-time check to "); | ||||
3521 | dump_lower_bound (MSG_NOTE, lower_bounds[i]); | ||||
3522 | dump_printf (MSG_NOTE, "\n"); | ||||
3523 | } | ||||
3524 | } | ||||
3525 | return; | ||||
3526 | } | ||||
3527 | |||||
3528 | vec_lower_bound lower_bound (expr, unsigned_p, min_value); | ||||
3529 | if (dump_enabled_p ()) | ||||
3530 | { | ||||
3531 | dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that "); | ||||
3532 | dump_lower_bound (MSG_NOTE, lower_bound); | ||||
3533 | dump_printf (MSG_NOTE, "\n"); | ||||
3534 | } | ||||
3535 | LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.safe_push (lower_bound); | ||||
3536 | } | ||||
3537 | |||||
3538 | /* Return true if it's unlikely that the step of the vectorized form of DR_INFO | ||||
3539 | will span fewer than GAP bytes. */ | ||||
3540 | |||||
3541 | static bool | ||||
3542 | vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info, | ||||
3543 | poly_int64 gap) | ||||
3544 | { | ||||
3545 | stmt_vec_info stmt_info = dr_info->stmt; | ||||
3546 | HOST_WIDE_INTlong count | ||||
3547 | = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor); | ||||
3548 | if (DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3548, __FUNCTION__), 0 : 0)), (stmt_info)->first_element )) | ||||
3549 | count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))(((void)(!(((((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3549, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ))->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3549, __FUNCTION__), 0 : 0)), ((((void)(!((stmt_info)->dr_aux .dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3549, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ))->size); | ||||
3550 | return (estimated_poly_value (gap) | ||||
3551 | <= count * vect_get_scalar_dr_size (dr_info)); | ||||
3552 | } | ||||
3553 | |||||
3554 | /* Return true if we know that there is no alias between DR_INFO_A and | ||||
3555 | DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N. | ||||
3556 | When returning true, set *LOWER_BOUND_OUT to this N. */ | ||||
3557 | |||||
3558 | static bool | ||||
3559 | vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b, | ||||
3560 | poly_uint64 *lower_bound_out) | ||||
3561 | { | ||||
3562 | /* Check that there is a constant gap of known sign between DR_A | ||||
3563 | and DR_B. */ | ||||
3564 | data_reference *dr_a = dr_info_a->dr; | ||||
3565 | data_reference *dr_b = dr_info_b->dr; | ||||
3566 | poly_int64 init_a, init_b; | ||||
3567 | if (!operand_equal_p (DR_BASE_ADDRESS (dr_a)(dr_a)->innermost.base_address, DR_BASE_ADDRESS (dr_b)(dr_b)->innermost.base_address, 0) | ||||
3568 | || !operand_equal_p (DR_OFFSET (dr_a)(dr_a)->innermost.offset, DR_OFFSET (dr_b)(dr_b)->innermost.offset, 0) | ||||
3569 | || !operand_equal_p (DR_STEP (dr_a)(dr_a)->innermost.step, DR_STEP (dr_b)(dr_b)->innermost.step, 0) | ||||
3570 | || !poly_int_tree_p (DR_INIT (dr_a)(dr_a)->innermost.init, &init_a) | ||||
3571 | || !poly_int_tree_p (DR_INIT (dr_b)(dr_b)->innermost.init, &init_b) | ||||
3572 | || !ordered_p (init_a, init_b)) | ||||
3573 | return false; | ||||
3574 | |||||
3575 | /* Sort DR_A and DR_B by the address they access. */ | ||||
3576 | if (maybe_lt (init_b, init_a)) | ||||
3577 | { | ||||
3578 | std::swap (init_a, init_b); | ||||
3579 | std::swap (dr_info_a, dr_info_b); | ||||
3580 | std::swap (dr_a, dr_b); | ||||
3581 | } | ||||
3582 | |||||
3583 | /* If the two accesses could be dependent within a scalar iteration, | ||||
3584 | make sure that we'd retain their order. */ | ||||
3585 | if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)maybe_lt (init_b, init_a + vect_get_scalar_dr_size (dr_info_a )) | ||||
3586 | && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b)) | ||||
3587 | return false; | ||||
3588 | |||||
3589 | /* There is no alias if abs (DR_STEP) is greater than or equal to | ||||
3590 | the bytes spanned by the combination of the two accesses. */ | ||||
3591 | *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a; | ||||
3592 | return true; | ||||
3593 | } | ||||
3594 | |||||
3595 | /* Function vect_prune_runtime_alias_test_list. | ||||
3596 | |||||
3597 | Prune a list of ddrs to be tested at run-time by versioning for alias. | ||||
3598 | Merge several alias checks into one if possible. | ||||
3599 | Return FALSE if resulting list of ddrs is longer then allowed by | ||||
3600 | PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */ | ||||
3601 | |||||
3602 | opt_result | ||||
3603 | vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) | ||||
3604 | { | ||||
3605 | typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash; | ||||
3606 | hash_set <tree_pair_hash> compared_objects; | ||||
3607 | |||||
3608 | const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->may_alias_ddrs; | ||||
3609 | vec<dr_with_seg_len_pair_t> &comp_alias_ddrs | ||||
3610 | = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs; | ||||
3611 | const vec<vec_object_pair> &check_unequal_addrs | ||||
3612 | = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs; | ||||
3613 | poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | ||||
3614 | tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters; | ||||
3615 | |||||
3616 | ddr_p ddr; | ||||
3617 | unsigned int i; | ||||
3618 | tree length_factor; | ||||
3619 | |||||
3620 | DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list")auto_dump_scope scope ("vect_prune_runtime_alias_test_list", vect_location ); | ||||
3621 | |||||
3622 | /* Step values are irrelevant for aliasing if the number of vector | ||||
3623 | iterations is equal to the number of scalar iterations (which can | ||||
3624 | happen for fully-SLP loops). */ | ||||
3625 | bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U)(!maybe_ne ((loop_vinfo)->vectorization_factor, 1U)); | ||||
3626 | |||||
3627 | if (!vf_one_p) | ||||
3628 | { | ||||
3629 | /* Convert the checks for nonzero steps into bound tests. */ | ||||
3630 | tree value; | ||||
3631 | FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)for (i = 0; ((loop_vinfo)->check_nonzero).iterate ((i), & (value)); ++(i)) | ||||
3632 | vect_check_lower_bound (loop_vinfo, value, true, 1); | ||||
3633 | } | ||||
3634 | |||||
3635 | if (may_alias_ddrs.is_empty ()) | ||||
3636 | return opt_result::success (); | ||||
3637 | |||||
3638 | comp_alias_ddrs.create (may_alias_ddrs.length ()); | ||||
3639 | |||||
3640 | unsigned int loop_depth | ||||
3641 | = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->num, | ||||
3642 | LOOP_VINFO_LOOP_NEST (loop_vinfo)(loop_vinfo)->shared->loop_nest); | ||||
3643 | |||||
3644 | /* First, we collect all data ref pairs for aliasing checks. */ | ||||
3645 | FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)for (i = 0; (may_alias_ddrs).iterate ((i), &(ddr)); ++(i) ) | ||||
3646 | { | ||||
3647 | poly_uint64 lower_bound; | ||||
3648 | tree segment_length_a, segment_length_b; | ||||
3649 | unsigned HOST_WIDE_INTlong access_size_a, access_size_b; | ||||
3650 | unsigned int align_a, align_b; | ||||
3651 | |||||
3652 | /* Ignore the alias if the VF we chose ended up being no greater | ||||
3653 | than the dependence distance. */ | ||||
3654 | if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor)) | ||||
3655 | continue; | ||||
3656 | |||||
3657 | if (DDR_OBJECT_A (ddr)(ddr)->object_a) | ||||
3658 | { | ||||
3659 | vec_object_pair new_pair (DDR_OBJECT_A (ddr)(ddr)->object_a, DDR_OBJECT_B (ddr)(ddr)->object_b); | ||||
3660 | if (!compared_objects.add (new_pair)) | ||||
3661 | { | ||||
3662 | if (dump_enabled_p ()) | ||||
3663 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
3664 | "checking that %T and %T" | ||||
3665 | " have different addresses\n", | ||||
3666 | new_pair.first, new_pair.second); | ||||
3667 | LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.safe_push (new_pair); | ||||
3668 | } | ||||
3669 | continue; | ||||
3670 | } | ||||
3671 | |||||
3672 | dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr)(ddr)->a); | ||||
3673 | stmt_vec_info stmt_info_a = dr_info_a->stmt; | ||||
3674 | |||||
3675 | dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr)(ddr)->b); | ||||
3676 | stmt_vec_info stmt_info_b = dr_info_b->stmt; | ||||
3677 | |||||
3678 | bool preserves_scalar_order_p | ||||
3679 | = vect_preserves_scalar_order_p (dr_info_a, dr_info_b); | ||||
3680 | bool ignore_step_p | ||||
3681 | = (vf_one_p | ||||
3682 | && (preserves_scalar_order_p | ||||
3683 | || operand_equal_p (DR_STEP (dr_info_a->dr)(dr_info_a->dr)->innermost.step, | ||||
3684 | DR_STEP (dr_info_b->dr)(dr_info_b->dr)->innermost.step))); | ||||
3685 | |||||
3686 | /* Skip the pair if inter-iteration dependencies are irrelevant | ||||
3687 | and intra-iteration dependencies are guaranteed to be honored. */ | ||||
3688 | if (ignore_step_p | ||||
3689 | && (preserves_scalar_order_p | ||||
3690 | || vectorizable_with_step_bound_p (dr_info_a, dr_info_b, | ||||
3691 | &lower_bound))) | ||||
3692 | { | ||||
3693 | if (dump_enabled_p ()) | ||||
3694 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
3695 | "no need for alias check between " | ||||
3696 | "%T and %T when VF is 1\n", | ||||
3697 | DR_REF (dr_info_a->dr)(dr_info_a->dr)->ref, DR_REF (dr_info_b->dr)(dr_info_b->dr)->ref); | ||||
3698 | continue; | ||||
3699 | } | ||||
3700 | |||||
3701 | /* See whether we can handle the alias using a bounds check on | ||||
3702 | the step, and whether that's likely to be the best approach. | ||||
3703 | (It might not be, for example, if the minimum step is much larger | ||||
3704 | than the number of bytes handled by one vector iteration.) */ | ||||
3705 | if (!ignore_step_p | ||||
3706 | && TREE_CODE (DR_STEP (dr_info_a->dr))((enum tree_code) ((dr_info_a->dr)->innermost.step)-> base.code) != INTEGER_CST | ||||
3707 | && vectorizable_with_step_bound_p (dr_info_a, dr_info_b, | ||||
3708 | &lower_bound) | ||||
3709 | && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound) | ||||
3710 | || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound))) | ||||
3711 | { | ||||
3712 | bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr); | ||||
3713 | if (dump_enabled_p ()) | ||||
3714 | { | ||||
3715 | dump_printf_loc (MSG_NOTE, vect_location, "no alias between " | ||||
3716 | "%T and %T when the step %T is outside ", | ||||
3717 | DR_REF (dr_info_a->dr)(dr_info_a->dr)->ref, | ||||
3718 | DR_REF (dr_info_b->dr)(dr_info_b->dr)->ref, | ||||
3719 | DR_STEP (dr_info_a->dr)(dr_info_a->dr)->innermost.step); | ||||
3720 | if (unsigned_p) | ||||
3721 | dump_printf (MSG_NOTE, "[0"); | ||||
3722 | else | ||||
3723 | { | ||||
3724 | dump_printf (MSG_NOTE, "("); | ||||
3725 | dump_dec (MSG_NOTE, poly_int64 (-lower_bound)); | ||||
3726 | } | ||||
3727 | dump_printf (MSG_NOTE, ", "); | ||||
3728 | dump_dec (MSG_NOTE, lower_bound); | ||||
3729 | dump_printf (MSG_NOTE, ")\n"); | ||||
3730 | } | ||||
3731 | vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr)(dr_info_a->dr)->innermost.step, | ||||
3732 | unsigned_p, lower_bound); | ||||
3733 | continue; | ||||
3734 | } | ||||
3735 | |||||
3736 | stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a)(((void)(!((stmt_info_a)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3736, __FUNCTION__), 0 : 0)), (stmt_info_a)->first_element ); | ||||
3737 | if (dr_group_first_a) | ||||
3738 | { | ||||
3739 | stmt_info_a = dr_group_first_a; | ||||
3740 | dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a)(((void)(!((stmt_info_a)->dr_aux.stmt == (stmt_info_a)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3740, __FUNCTION__), 0 : 0)), &(stmt_info_a)->dr_aux ); | ||||
3741 | } | ||||
3742 | |||||
3743 | stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b)(((void)(!((stmt_info_b)->dr_aux.dr) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3743, __FUNCTION__), 0 : 0)), (stmt_info_b)->first_element ); | ||||
3744 | if (dr_group_first_b) | ||||
3745 | { | ||||
3746 | stmt_info_b = dr_group_first_b; | ||||
3747 | dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b)(((void)(!((stmt_info_b)->dr_aux.stmt == (stmt_info_b)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3747, __FUNCTION__), 0 : 0)), &(stmt_info_b)->dr_aux ); | ||||
3748 | } | ||||
3749 | |||||
3750 | if (ignore_step_p) | ||||
3751 | { | ||||
3752 | segment_length_a = size_zero_nodeglobal_trees[TI_SIZE_ZERO]; | ||||
3753 | segment_length_b = size_zero_nodeglobal_trees[TI_SIZE_ZERO]; | ||||
3754 | } | ||||
3755 | else | ||||
3756 | { | ||||
3757 | if (!operand_equal_p (DR_STEP (dr_info_a->dr)(dr_info_a->dr)->innermost.step, | ||||
3758 | DR_STEP (dr_info_b->dr)(dr_info_b->dr)->innermost.step, 0)) | ||||
3759 | length_factor = scalar_loop_iters; | ||||
3760 | else | ||||
3761 | length_factor = size_int (vect_factor)size_int_kind (vect_factor, stk_sizetype); | ||||
3762 | segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor); | ||||
3763 | segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor); | ||||
3764 | } | ||||
3765 | access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a); | ||||
3766 | access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b); | ||||
3767 | align_a = vect_vfa_align (dr_info_a); | ||||
3768 | align_b = vect_vfa_align (dr_info_b); | ||||
3769 | |||||
3770 | /* See whether the alias is known at compilation time. */ | ||||
3771 | if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr)(dr_info_a->dr)->innermost.base_address, | ||||
3772 | DR_BASE_ADDRESS (dr_info_b->dr)(dr_info_b->dr)->innermost.base_address, 0) | ||||
3773 | && operand_equal_p (DR_OFFSET (dr_info_a->dr)(dr_info_a->dr)->innermost.offset, | ||||
3774 | DR_OFFSET (dr_info_b->dr)(dr_info_b->dr)->innermost.offset, 0) | ||||
3775 | && TREE_CODE (DR_STEP (dr_info_a->dr))((enum tree_code) ((dr_info_a->dr)->innermost.step)-> base.code) == INTEGER_CST | ||||
3776 | && TREE_CODE (DR_STEP (dr_info_b->dr))((enum tree_code) ((dr_info_b->dr)->innermost.step)-> base.code) == INTEGER_CST | ||||
3777 | && poly_int_tree_p (segment_length_a) | ||||
3778 | && poly_int_tree_p (segment_length_b)) | ||||
3779 | { | ||||
3780 | int res = vect_compile_time_alias (dr_info_a, dr_info_b, | ||||
3781 | segment_length_a, | ||||
3782 | segment_length_b, | ||||
3783 | access_size_a, | ||||
3784 | access_size_b); | ||||
3785 | if (res >= 0 && dump_enabled_p ()) | ||||
3786 | { | ||||
3787 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
3788 | "can tell at compile time that %T and %T", | ||||
3789 | DR_REF (dr_info_a->dr)(dr_info_a->dr)->ref, DR_REF (dr_info_b->dr)(dr_info_b->dr)->ref); | ||||
3790 | if (res == 0) | ||||
3791 | dump_printf (MSG_NOTE, " do not alias\n"); | ||||
3792 | else | ||||
3793 | dump_printf (MSG_NOTE, " alias\n"); | ||||
3794 | } | ||||
3795 | |||||
3796 | if (res == 0) | ||||
3797 | continue; | ||||
3798 | |||||
3799 | if (res == 1) | ||||
3800 | return opt_result::failure_at (stmt_info_b->stmt, | ||||
3801 | "not vectorized:" | ||||
3802 | " compilation time alias: %G%G", | ||||
3803 | stmt_info_a->stmt, | ||||
3804 | stmt_info_b->stmt); | ||||
3805 | } | ||||
3806 | |||||
3807 | dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a, | ||||
3808 | access_size_a, align_a); | ||||
3809 | dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b, | ||||
3810 | access_size_b, align_b); | ||||
3811 | /* Canonicalize the order to be the one that's needed for accurate | ||||
3812 | RAW, WAR and WAW flags, in cases where the data references are | ||||
3813 | well-ordered. The order doesn't really matter otherwise, | ||||
3814 | but we might as well be consistent. */ | ||||
3815 | if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a) | ||||
3816 | std::swap (dr_a, dr_b); | ||||
3817 | |||||
3818 | dr_with_seg_len_pair_t dr_with_seg_len_pair | ||||
3819 | (dr_a, dr_b, (preserves_scalar_order_p | ||||
3820 | ? dr_with_seg_len_pair_t::WELL_ORDERED | ||||
3821 | : dr_with_seg_len_pair_t::REORDERED)); | ||||
3822 | |||||
3823 | comp_alias_ddrs.safe_push (dr_with_seg_len_pair); | ||||
3824 | } | ||||
3825 | |||||
3826 | prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor); | ||||
3827 | |||||
3828 | unsigned int count = (comp_alias_ddrs.length () | ||||
3829 | + check_unequal_addrs.length ()); | ||||
3830 | |||||
3831 | if (count | ||||
3832 | && (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop) | ||||
3833 | == VECT_COST_MODEL_VERY_CHEAP)) | ||||
3834 | return opt_result::failure_at | ||||
3835 | (vect_location, "would need a runtime alias check\n"); | ||||
3836 | |||||
3837 | if (dump_enabled_p ()) | ||||
3838 | dump_printf_loc (MSG_NOTE, vect_location, | ||||
3839 | "improved number of alias checks from %d to %d\n", | ||||
3840 | may_alias_ddrs.length (), count); | ||||
3841 | unsigned limit = param_vect_max_version_for_alias_checksglobal_options.x_param_vect_max_version_for_alias_checks; | ||||
3842 | if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop) == VECT_COST_MODEL_CHEAP) | ||||
3843 | limit = param_vect_max_version_for_alias_checksglobal_options.x_param_vect_max_version_for_alias_checks * 6 / 10; | ||||
3844 | if (count > limit) | ||||
3845 | return opt_result::failure_at | ||||
3846 | (vect_location, | ||||
3847 | "number of versioning for alias run-time tests exceeds %d " | ||||
3848 | "(--param vect-max-version-for-alias-checks)\n", limit); | ||||
3849 | |||||
3850 | return opt_result::success (); | ||||
3851 | } | ||||
3852 | |||||
3853 | /* Check whether we can use an internal function for a gather load | ||||
3854 | or scatter store. READ_P is true for loads and false for stores. | ||||
3855 | MASKED_P is true if the load or store is conditional. MEMORY_TYPE is | ||||
3856 | the type of the memory elements being loaded or stored. OFFSET_TYPE | ||||
3857 | is the type of the offset that is being applied to the invariant | ||||
3858 | base address. SCALE is the amount by which the offset should | ||||
3859 | be multiplied *after* it has been converted to address width. | ||||
3860 | |||||
3861 | Return true if the function is supported, storing the function id in | ||||
3862 | *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */ | ||||
3863 | |||||
3864 | bool | ||||
3865 | vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, | ||||
3866 | tree vectype, tree memory_type, tree offset_type, | ||||
3867 | int scale, internal_fn *ifn_out, | ||||
3868 | tree *offset_vectype_out) | ||||
3869 | { | ||||
3870 | unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)((tree_class_check ((memory_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3870, __FUNCTION__))->type_common.size)); | ||||
3871 | unsigned int element_bits = vector_element_bits (vectype); | ||||
3872 | if (element_bits != memory_bits) | ||||
3873 | /* For now the vector elements must be the same width as the | ||||
3874 | memory elements. */ | ||||
3875 | return false; | ||||
3876 | |||||
3877 | /* Work out which function we need. */ | ||||
3878 | internal_fn ifn, alt_ifn; | ||||
3879 | if (read_p) | ||||
3880 | { | ||||
3881 | ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; | ||||
3882 | alt_ifn = IFN_MASK_GATHER_LOAD; | ||||
3883 | } | ||||
3884 | else | ||||
3885 | { | ||||
3886 | ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; | ||||
3887 | alt_ifn = IFN_MASK_SCATTER_STORE; | ||||
3888 | } | ||||
3889 | |||||
3890 | for (;;) | ||||
3891 | { | ||||
3892 | tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); | ||||
3893 | if (!offset_vectype) | ||||
3894 | return false; | ||||
3895 | |||||
3896 | /* Test whether the target supports this combination. */ | ||||
3897 | if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, | ||||
3898 | offset_vectype, scale)) | ||||
3899 | { | ||||
3900 | *ifn_out = ifn; | ||||
3901 | *offset_vectype_out = offset_vectype; | ||||
3902 | return true; | ||||
3903 | } | ||||
3904 | else if (!masked_p | ||||
3905 | && internal_gather_scatter_fn_supported_p (alt_ifn, vectype, | ||||
3906 | memory_type, | ||||
3907 | offset_vectype, | ||||
3908 | scale)) | ||||
3909 | { | ||||
3910 | *ifn_out = alt_ifn; | ||||
3911 | *offset_vectype_out = offset_vectype; | ||||
3912 | return true; | ||||
3913 | } | ||||
3914 | |||||
3915 | if (TYPE_PRECISION (offset_type)((tree_class_check ((offset_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3915, __FUNCTION__))->type_common.precision) >= POINTER_SIZE(((global_options.x_ix86_isa_flags & (1UL << 58)) != 0) ? 32 : ((8) * (((global_options.x_ix86_isa_flags & (1UL << 1)) != 0) ? 8 : 4))) | ||||
3916 | && TYPE_PRECISION (offset_type)((tree_class_check ((offset_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3916, __FUNCTION__))->type_common.precision) >= element_bits) | ||||
3917 | return false; | ||||
3918 | |||||
3919 | offset_type = build_nonstandard_integer_type | ||||
3920 | (TYPE_PRECISION (offset_type)((tree_class_check ((offset_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3920, __FUNCTION__))->type_common.precision) * 2, TYPE_UNSIGNED (offset_type)((tree_class_check ((offset_type), (tcc_type), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3920, __FUNCTION__))->base.u.bits.unsigned_flag)); | ||||
3921 | } | ||||
3922 | } | ||||
3923 | |||||
3924 | /* STMT_INFO is a call to an internal gather load or scatter store function. | ||||
3925 | Describe the operation in INFO. */ | ||||
3926 | |||||
3927 | static void | ||||
3928 | vect_describe_gather_scatter_call (stmt_vec_info stmt_info, | ||||
3929 | gather_scatter_info *info) | ||||
3930 | { | ||||
3931 | gcall *call = as_a <gcall *> (stmt_info->stmt); | ||||
3932 | tree vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; | ||||
3933 | data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0); | ||||
3934 | |||||
3935 | info->ifn = gimple_call_internal_fn (call); | ||||
3936 | info->decl = NULL_TREE(tree) nullptr; | ||||
3937 | info->base = gimple_call_arg (call, 0); | ||||
3938 | info->offset = gimple_call_arg (call, 1); | ||||
3939 | info->offset_dt = vect_unknown_def_type; | ||||
3940 | info->offset_vectype = NULL_TREE(tree) nullptr; | ||||
3941 | info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2))((unsigned long) (*tree_int_cst_elt_check ((gimple_call_arg ( call, 2)), (0), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3941, __FUNCTION__))); | ||||
3942 | info->element_type = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3942, __FUNCTION__))->typed.type); | ||||
3943 | info->memory_type = TREE_TYPE (DR_REF (dr))((contains_struct_check (((dr)->ref), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3943, __FUNCTION__))->typed.type); | ||||
3944 | } | ||||
3945 | |||||
3946 | /* Return true if a non-affine read or write in STMT_INFO is suitable for a | ||||
3947 | gather load or scatter store. Describe the operation in *INFO if so. */ | ||||
3948 | |||||
3949 | bool | ||||
3950 | vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, | ||||
3951 | gather_scatter_info *info) | ||||
3952 | { | ||||
3953 | HOST_WIDE_INTlong scale = 1; | ||||
3954 | poly_int64 pbitpos, pbitsize; | ||||
3955 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||
3956 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0); | ||||
3957 | tree offtype = NULL_TREE(tree) nullptr; | ||||
3958 | tree decl = NULL_TREE(tree) nullptr, base, off; | ||||
3959 | tree vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; | ||||
3960 | tree memory_type = TREE_TYPE (DR_REF (dr))((contains_struct_check (((dr)->ref), (TS_TYPED), "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3960, __FUNCTION__))->typed.type); | ||||
3961 | machine_mode pmode; | ||||
3962 | int punsignedp, reversep, pvolatilep = 0; | ||||
3963 | internal_fn ifn; | ||||
3964 | tree offset_vectype; | ||||
| |||||
3965 | bool masked_p = false; | ||||
3966 | |||||
3967 | /* See whether this is already a call to a gather/scatter internal function. | ||||
3968 | If not, see whether it's a masked load or store. */ | ||||
3969 | gcall *call = dyn_cast <gcall *> (stmt_info->stmt); | ||||
3970 | if (call
| ||||
3971 | { | ||||
3972 | ifn = gimple_call_internal_fn (call); | ||||
3973 | if (internal_gather_scatter_fn_p (ifn)) | ||||
3974 | { | ||||
3975 | vect_describe_gather_scatter_call (stmt_info, info); | ||||
3976 | return true; | ||||
3977 | } | ||||
3978 | masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE); | ||||
3979 | } | ||||
3980 | |||||
3981 | /* True if we should aim to use internal functions rather than | ||||
3982 | built-in functions. */ | ||||
3983 | bool use_ifn_p = (DR_IS_READ (dr)(dr)->is_read | ||||
3984 | ? supports_vec_gather_load_p (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3984, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode)) | ||||
3985 | : supports_vec_scatter_store_p (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-data-refs.cc" , 3985, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode))); | ||||
3986 | |||||
3987 | base = DR_REF (dr)(dr)->ref; | ||||
3988 | /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF, | ||||
3989 | see if we can use the def stmt of the address. */ | ||||
3990 | if (masked_p
|
9.1 | 'masked_p' is false |
39 | Assigned value is garbage or undefined |