File: | build/gcc/modulo-sched.cc |
Warning: | line 1584, column 27 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Swing Modulo Scheduling implementation. | |||
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. | |||
3 | Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com> | |||
4 | ||||
5 | This file is part of GCC. | |||
6 | ||||
7 | GCC is free software; you can redistribute it and/or modify it under | |||
8 | the terms of the GNU General Public License as published by the Free | |||
9 | Software Foundation; either version 3, or (at your option) any later | |||
10 | version. | |||
11 | ||||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |||
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
15 | for more details. | |||
16 | ||||
17 | You should have received a copy of the GNU General Public License | |||
18 | along with GCC; see the file COPYING3. If not see | |||
19 | <http://www.gnu.org/licenses/>. */ | |||
20 | ||||
21 | ||||
22 | #include "config.h" | |||
23 | #include "system.h" | |||
24 | #include "coretypes.h" | |||
25 | #include "backend.h" | |||
26 | #include "target.h" | |||
27 | #include "rtl.h" | |||
28 | #include "tree.h" | |||
29 | #include "cfghooks.h" | |||
30 | #include "df.h" | |||
31 | #include "memmodel.h" | |||
32 | #include "optabs.h" | |||
33 | #include "regs.h" | |||
34 | #include "emit-rtl.h" | |||
35 | #include "gcov-io.h" | |||
36 | #include "profile.h" | |||
37 | #include "insn-attr.h" | |||
38 | #include "cfgrtl.h" | |||
39 | #include "sched-int.h" | |||
40 | #include "cfgloop.h" | |||
41 | #include "expr.h" | |||
42 | #include "ddg.h" | |||
43 | #include "tree-pass.h" | |||
44 | #include "dbgcnt.h" | |||
45 | #include "loop-unroll.h" | |||
46 | #include "hard-reg-set.h" | |||
47 | ||||
48 | #ifdef INSN_SCHEDULING | |||
49 | ||||
50 | /* This file contains the implementation of the Swing Modulo Scheduler, | |||
51 | described in the following references: | |||
52 | [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt. | |||
53 | Lifetime--sensitive modulo scheduling in a production environment. | |||
54 | IEEE Trans. on Comps., 50(3), March 2001 | |||
55 | [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero. | |||
56 | Swing Modulo Scheduling: A Lifetime Sensitive Approach. | |||
57 | PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA). | |||
58 | ||||
59 | The basic structure is: | |||
60 | 1. Build a data-dependence graph (DDG) for each loop. | |||
61 | 2. Use the DDG to order the insns of a loop (not in topological order | |||
62 | necessarily, but rather) trying to place each insn after all its | |||
63 | predecessors _or_ after all its successors. | |||
64 | 3. Compute MII: a lower bound on the number of cycles to schedule the loop. | |||
65 | 4. Use the ordering to perform list-scheduling of the loop: | |||
66 | 1. Set II = MII. We will try to schedule the loop within II cycles. | |||
67 | 2. Try to schedule the insns one by one according to the ordering. | |||
68 | For each insn compute an interval of cycles by considering already- | |||
69 | scheduled preds and succs (and associated latencies); try to place | |||
70 | the insn in the cycles of this window checking for potential | |||
71 | resource conflicts (using the DFA interface). | |||
72 | Note: this is different from the cycle-scheduling of schedule_insns; | |||
73 | here the insns are not scheduled monotonically top-down (nor bottom- | |||
74 | up). | |||
75 | 3. If failed in scheduling all insns - bump II++ and try again, unless | |||
76 | II reaches an upper bound MaxII, in which case report failure. | |||
77 | 5. If we succeeded in scheduling the loop within II cycles, we now | |||
78 | generate prolog and epilog, decrease the counter of the loop, and | |||
79 | perform modulo variable expansion for live ranges that span more than | |||
80 | II cycles (i.e. use register copies to prevent a def from overwriting | |||
81 | itself before reaching the use). | |||
82 | ||||
83 | SMS works with countable loops (1) whose control part can be easily | |||
84 | decoupled from the rest of the loop and (2) whose loop count can | |||
85 | be easily adjusted. This is because we peel a constant number of | |||
86 | iterations into a prologue and epilogue for which we want to avoid | |||
87 | emitting the control part, and a kernel which is to iterate that | |||
88 | constant number of iterations less than the original loop. So the | |||
89 | control part should be a set of insns clearly identified and having | |||
90 | its own iv, not otherwise used in the loop (at-least for now), which | |||
91 | initializes a register before the loop to the number of iterations. | |||
92 | Currently SMS relies on the do-loop pattern to recognize such loops, | |||
93 | where (1) the control part comprises of all insns defining and/or | |||
94 | using a certain 'count' register and (2) the loop count can be | |||
95 | adjusted by modifying this register prior to the loop. | |||
96 | TODO: Rely on cfgloop analysis instead. */ | |||
97 | ||||
98 | /* This page defines partial-schedule structures and functions for | |||
99 | modulo scheduling. */ | |||
100 | ||||
101 | typedef struct partial_schedule *partial_schedule_ptr; | |||
102 | typedef struct ps_insn *ps_insn_ptr; | |||
103 | ||||
104 | /* The minimum (absolute) cycle that a node of ps was scheduled in. */ | |||
105 | #define PS_MIN_CYCLE(ps)(((partial_schedule_ptr)(ps))->min_cycle) (((partial_schedule_ptr)(ps))->min_cycle) | |||
106 | ||||
107 | /* The maximum (absolute) cycle that a node of ps was scheduled in. */ | |||
108 | #define PS_MAX_CYCLE(ps)(((partial_schedule_ptr)(ps))->max_cycle) (((partial_schedule_ptr)(ps))->max_cycle) | |||
109 | ||||
110 | /* Perform signed modulo, always returning a non-negative value. */ | |||
111 | #define SMODULO(x,y)((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y)) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y)) | |||
112 | ||||
113 | /* The number of different iterations the nodes in ps span, assuming | |||
114 | the stage boundaries are placed efficiently. */ | |||
115 | #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii)((max_cycle - min_cycle + 1 + ii - 1) / ii) ((max_cycle - min_cycle \ | |||
116 | + 1 + ii - 1) / ii) | |||
117 | /* The stage count of ps. */ | |||
118 | #define PS_STAGE_COUNT(ps)(((partial_schedule_ptr)(ps))->stage_count) (((partial_schedule_ptr)(ps))->stage_count) | |||
119 | ||||
120 | /* A single instruction in the partial schedule. */ | |||
121 | struct ps_insn | |||
122 | { | |||
123 | /* Identifies the instruction to be scheduled. Values smaller than | |||
124 | the ddg's num_nodes refer directly to ddg nodes. A value of | |||
125 | X - num_nodes refers to register move X. */ | |||
126 | int id; | |||
127 | ||||
128 | /* The (absolute) cycle in which the PS instruction is scheduled. | |||
129 | Same as SCHED_TIME (node). */ | |||
130 | int cycle; | |||
131 | ||||
132 | /* The next/prev PS_INSN in the same row. */ | |||
133 | ps_insn_ptr next_in_row, | |||
134 | prev_in_row; | |||
135 | ||||
136 | }; | |||
137 | ||||
138 | /* Information about a register move that has been added to a partial | |||
139 | schedule. */ | |||
140 | struct ps_reg_move_info | |||
141 | { | |||
142 | /* The source of the move is defined by the ps_insn with id DEF. | |||
143 | The destination is used by the ps_insns with the ids in USES. */ | |||
144 | int def; | |||
145 | sbitmap uses; | |||
146 | ||||
147 | /* The original form of USES' instructions used OLD_REG, but they | |||
148 | should now use NEW_REG. */ | |||
149 | rtx old_reg; | |||
150 | rtx new_reg; | |||
151 | ||||
152 | /* The number of consecutive stages that the move occupies. */ | |||
153 | int num_consecutive_stages; | |||
154 | ||||
155 | /* An instruction that sets NEW_REG to the correct value. The first | |||
156 | move associated with DEF will have an rhs of OLD_REG; later moves | |||
157 | use the result of the previous move. */ | |||
158 | rtx_insn *insn; | |||
159 | }; | |||
160 | ||||
161 | /* Holds the partial schedule as an array of II rows. Each entry of the | |||
162 | array points to a linked list of PS_INSNs, which represents the | |||
163 | instructions that are scheduled for that row. */ | |||
164 | struct partial_schedule | |||
165 | { | |||
166 | int ii; /* Number of rows in the partial schedule. */ | |||
167 | int history; /* Threshold for conflict checking using DFA. */ | |||
168 | ||||
169 | /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ | |||
170 | ps_insn_ptr *rows; | |||
171 | ||||
172 | /* All the moves added for this partial schedule. Index X has | |||
173 | a ps_insn id of X + g->num_nodes. */ | |||
174 | vec<ps_reg_move_info> reg_moves; | |||
175 | ||||
176 | /* rows_length[i] holds the number of instructions in the row. | |||
177 | It is used only (as an optimization) to back off quickly from | |||
178 | trying to schedule a node in a full row; that is, to avoid running | |||
179 | through futile DFA state transitions. */ | |||
180 | int *rows_length; | |||
181 | ||||
182 | /* The earliest absolute cycle of an insn in the partial schedule. */ | |||
183 | int min_cycle; | |||
184 | ||||
185 | /* The latest absolute cycle of an insn in the partial schedule. */ | |||
186 | int max_cycle; | |||
187 | ||||
188 | ddg_ptr g; /* The DDG of the insns in the partial schedule. */ | |||
189 | ||||
190 | int stage_count; /* The stage count of the partial schedule. */ | |||
191 | }; | |||
192 | ||||
193 | ||||
194 | static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); | |||
195 | static void free_partial_schedule (partial_schedule_ptr); | |||
196 | static void reset_partial_schedule (partial_schedule_ptr, int new_ii); | |||
197 | void print_partial_schedule (partial_schedule_ptr, FILE *); | |||
198 | static void verify_partial_schedule (partial_schedule_ptr, sbitmap); | |||
199 | static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, | |||
200 | int, int, sbitmap, sbitmap); | |||
201 | static void rotate_partial_schedule (partial_schedule_ptr, int); | |||
202 | void set_row_column_for_ps (partial_schedule_ptr); | |||
203 | static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); | |||
204 | static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr); | |||
205 | ||||
206 | ||||
207 | /* This page defines constants and structures for the modulo scheduling | |||
208 | driver. */ | |||
209 | ||||
210 | static int sms_order_nodes (ddg_ptr, int, int *, int *); | |||
211 | static void set_node_sched_params (ddg_ptr); | |||
212 | static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *); | |||
213 | static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *); | |||
214 | static int calculate_stage_count (partial_schedule_ptr, int); | |||
215 | static void calculate_must_precede_follow (ddg_node_ptr, int, int, | |||
216 | int, int, sbitmap, sbitmap, sbitmap); | |||
217 | static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, | |||
218 | sbitmap, int, int *, int *, int *); | |||
219 | static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int, | |||
220 | sbitmap, int *, sbitmap, sbitmap); | |||
221 | static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); | |||
222 | ||||
223 | #define NODE_ASAP(node)((node)->aux.count) ((node)->aux.count) | |||
224 | ||||
225 | #define SCHED_PARAMS(x)(&node_sched_param_vec[x]) (&node_sched_param_vec[x]) | |||
226 | #define SCHED_TIME(x)((&node_sched_param_vec[x])->time) (SCHED_PARAMS (x)(&node_sched_param_vec[x])->time) | |||
227 | #define SCHED_ROW(x)((&node_sched_param_vec[x])->row) (SCHED_PARAMS (x)(&node_sched_param_vec[x])->row) | |||
228 | #define SCHED_STAGE(x)((&node_sched_param_vec[x])->stage) (SCHED_PARAMS (x)(&node_sched_param_vec[x])->stage) | |||
229 | #define SCHED_COLUMN(x)((&node_sched_param_vec[x])->column) (SCHED_PARAMS (x)(&node_sched_param_vec[x])->column) | |||
230 | ||||
231 | /* The scheduling parameters held for each node. */ | |||
232 | typedef struct node_sched_params | |||
233 | { | |||
234 | int time; /* The absolute scheduling cycle. */ | |||
235 | ||||
236 | int row; /* Holds time % ii. */ | |||
237 | int stage; /* Holds time / ii. */ | |||
238 | ||||
239 | /* The column of a node inside the ps. If nodes u, v are on the same row, | |||
240 | u will precede v if column (u) < column (v). */ | |||
241 | int column; | |||
242 | } *node_sched_params_ptr; | |||
243 | ||||
244 | /* The following three functions are copied from the current scheduler | |||
245 | code in order to use sched_analyze() for computing the dependencies. | |||
246 | They are used when initializing the sched_info structure. */ | |||
247 | static const char * | |||
248 | sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED__attribute__ ((__unused__))) | |||
249 | { | |||
250 | static char tmp[80]; | |||
251 | ||||
252 | sprintf (tmp, "i%4d", INSN_UID (insn)); | |||
253 | return tmp; | |||
254 | } | |||
255 | ||||
256 | static void | |||
257 | compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED__attribute__ ((__unused__)), | |||
258 | regset used ATTRIBUTE_UNUSED__attribute__ ((__unused__))) | |||
259 | { | |||
260 | } | |||
261 | ||||
262 | static struct common_sched_info_def sms_common_sched_info; | |||
263 | ||||
264 | static struct sched_deps_info_def sms_sched_deps_info = | |||
265 | { | |||
266 | compute_jump_reg_dependencies, | |||
267 | NULLnullptr, NULLnullptr, NULLnullptr, NULLnullptr, NULLnullptr, NULLnullptr, NULLnullptr, NULLnullptr, NULLnullptr, NULLnullptr, | |||
268 | NULLnullptr, | |||
269 | 0, 0, 0 | |||
270 | }; | |||
271 | ||||
272 | static struct haifa_sched_info sms_sched_info = | |||
273 | { | |||
274 | NULLnullptr, | |||
275 | NULLnullptr, | |||
276 | NULLnullptr, | |||
277 | NULLnullptr, | |||
278 | NULLnullptr, | |||
279 | sms_print_insn, | |||
280 | NULLnullptr, | |||
281 | NULLnullptr, /* insn_finishes_block_p */ | |||
282 | NULLnullptr, NULLnullptr, | |||
283 | NULLnullptr, NULLnullptr, | |||
284 | 0, 0, | |||
285 | ||||
286 | NULLnullptr, NULLnullptr, NULLnullptr, NULLnullptr, | |||
287 | NULLnullptr, NULLnullptr, | |||
288 | 0 | |||
289 | }; | |||
290 | ||||
291 | /* Partial schedule instruction ID in PS is a register move. Return | |||
292 | information about it. */ | |||
293 | static struct ps_reg_move_info * | |||
294 | ps_reg_move (partial_schedule_ptr ps, int id) | |||
295 | { | |||
296 | gcc_checking_assert (id >= ps->g->num_nodes)((void)(!(id >= ps->g->num_nodes) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 296, __FUNCTION__), 0 : 0)); | |||
297 | return &ps->reg_moves[id - ps->g->num_nodes]; | |||
298 | } | |||
299 | ||||
300 | /* Return the rtl instruction that is being scheduled by partial schedule | |||
301 | instruction ID, which belongs to schedule PS. */ | |||
302 | static rtx_insn * | |||
303 | ps_rtl_insn (partial_schedule_ptr ps, int id) | |||
304 | { | |||
305 | if (id < ps->g->num_nodes) | |||
306 | return ps->g->nodes[id].insn; | |||
307 | else | |||
308 | return ps_reg_move (ps, id)->insn; | |||
309 | } | |||
310 | ||||
311 | /* Partial schedule instruction ID, which belongs to PS, occurred in | |||
312 | the original (unscheduled) loop. Return the first instruction | |||
313 | in the loop that was associated with ps_rtl_insn (PS, ID). | |||
314 | If the instruction had some notes before it, this is the first | |||
315 | of those notes. */ | |||
316 | static rtx_insn * | |||
317 | ps_first_note (partial_schedule_ptr ps, int id) | |||
318 | { | |||
319 | gcc_assert (id < ps->g->num_nodes)((void)(!(id < ps->g->num_nodes) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 319, __FUNCTION__), 0 : 0)); | |||
320 | return ps->g->nodes[id].first_note; | |||
321 | } | |||
322 | ||||
323 | /* Return the number of consecutive stages that are occupied by | |||
324 | partial schedule instruction ID in PS. */ | |||
325 | static int | |||
326 | ps_num_consecutive_stages (partial_schedule_ptr ps, int id) | |||
327 | { | |||
328 | if (id < ps->g->num_nodes) | |||
329 | return 1; | |||
330 | else | |||
331 | return ps_reg_move (ps, id)->num_consecutive_stages; | |||
332 | } | |||
333 | ||||
334 | /* Given HEAD and TAIL which are the first and last insns in a loop; | |||
335 | return the register which controls the loop. Return zero if it has | |||
336 | more than one occurrence in the loop besides the control part or the | |||
337 | do-loop pattern is not of the form we expect. */ | |||
338 | static rtx | |||
339 | doloop_register_get (rtx_insn *head, rtx_insn *tail) | |||
340 | { | |||
341 | rtx reg, condition; | |||
342 | rtx_insn *insn, *first_insn_not_to_check; | |||
343 | ||||
344 | if (!JUMP_P (tail)(((enum rtx_code) (tail)->code) == JUMP_INSN)) | |||
345 | return NULL_RTX(rtx) 0; | |||
346 | ||||
347 | if (!targetm.code_for_doloop_end) | |||
348 | return NULL_RTX(rtx) 0; | |||
349 | ||||
350 | /* TODO: Free SMS's dependence on doloop_condition_get. */ | |||
351 | condition = doloop_condition_get (tail); | |||
352 | if (! condition) | |||
353 | return NULL_RTX(rtx) 0; | |||
354 | ||||
355 | if (REG_P (XEXP (condition, 0))(((enum rtx_code) ((((condition)->u.fld[0]).rt_rtx))->code ) == REG)) | |||
356 | reg = XEXP (condition, 0)(((condition)->u.fld[0]).rt_rtx); | |||
357 | else if (GET_CODE (XEXP (condition, 0))((enum rtx_code) ((((condition)->u.fld[0]).rt_rtx))->code ) == PLUS | |||
358 | && REG_P (XEXP (XEXP (condition, 0), 0))(((enum rtx_code) (((((((condition)->u.fld[0]).rt_rtx))-> u.fld[0]).rt_rtx))->code) == REG)) | |||
359 | reg = XEXP (XEXP (condition, 0), 0)((((((condition)->u.fld[0]).rt_rtx))->u.fld[0]).rt_rtx); | |||
360 | else | |||
361 | gcc_unreachable ()(fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 361, __FUNCTION__)); | |||
362 | ||||
363 | /* Check that the COUNT_REG has no other occurrences in the loop | |||
364 | until the decrement. We assume the control part consists of | |||
365 | either a single (parallel) branch-on-count or a (non-parallel) | |||
366 | branch immediately preceded by a single (decrement) insn. */ | |||
367 | first_insn_not_to_check = (GET_CODE (PATTERN (tail))((enum rtx_code) (PATTERN (tail))->code) == PARALLEL ? tail | |||
368 | : prev_nondebug_insn (tail)); | |||
369 | ||||
370 | for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) | |||
371 | if (NONDEBUG_INSN_P (insn)((((enum rtx_code) (insn)->code) == INSN) || (((enum rtx_code ) (insn)->code) == JUMP_INSN) || (((enum rtx_code) (insn)-> code) == CALL_INSN)) && reg_mentioned_p (reg, insn)) | |||
372 | { | |||
373 | if (dump_file) | |||
374 | { | |||
375 | fprintf (dump_file, "SMS count_reg found "); | |||
376 | print_rtl_single (dump_file, reg); | |||
377 | fprintf (dump_file, " outside control in insn:\n"); | |||
378 | print_rtl_single (dump_file, insn); | |||
379 | } | |||
380 | ||||
381 | return NULL_RTX(rtx) 0; | |||
382 | } | |||
383 | ||||
384 | return reg; | |||
385 | } | |||
386 | ||||
387 | /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so | |||
388 | that the number of iterations is a compile-time constant. If so, | |||
389 | return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to | |||
390 | this constant. Otherwise return 0. */ | |||
391 | static rtx_insn * | |||
392 | const_iteration_count (rtx count_reg, basic_block pre_header, | |||
393 | int64_t *count, bool* adjust_inplace) | |||
394 | { | |||
395 | rtx_insn *insn; | |||
396 | rtx_insn *head, *tail; | |||
397 | ||||
398 | *adjust_inplace = false; | |||
399 | bool read_after = false; | |||
400 | ||||
401 | if (! pre_header) | |||
402 | return NULLnullptr; | |||
403 | ||||
404 | get_ebb_head_tail (pre_header, pre_header, &head, &tail); | |||
405 | ||||
406 | for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) | |||
407 | if (single_set (insn) && rtx_equal_p (count_reg, | |||
408 | SET_DEST (single_set (insn))(((single_set (insn))->u.fld[0]).rt_rtx))) | |||
409 | { | |||
410 | rtx pat = single_set (insn); | |||
411 | ||||
412 | if (CONST_INT_P (SET_SRC (pat))(((enum rtx_code) ((((pat)->u.fld[1]).rt_rtx))->code) == CONST_INT)) | |||
413 | { | |||
414 | *count = INTVAL (SET_SRC (pat))(((((pat)->u.fld[1]).rt_rtx))->u.hwint[0]); | |||
415 | *adjust_inplace = !read_after; | |||
416 | return insn; | |||
417 | } | |||
418 | ||||
419 | return NULLnullptr; | |||
420 | } | |||
421 | else if (NONDEBUG_INSN_P (insn)((((enum rtx_code) (insn)->code) == INSN) || (((enum rtx_code ) (insn)->code) == JUMP_INSN) || (((enum rtx_code) (insn)-> code) == CALL_INSN)) && reg_mentioned_p (count_reg, insn)) | |||
422 | { | |||
423 | read_after = true; | |||
424 | if (reg_set_p (count_reg, insn)) | |||
425 | break; | |||
426 | } | |||
427 | ||||
428 | return NULLnullptr; | |||
429 | } | |||
430 | ||||
431 | /* A very simple resource-based lower bound on the initiation interval. | |||
432 | ??? Improve the accuracy of this bound by considering the | |||
433 | utilization of various units. */ | |||
434 | static int | |||
435 | res_MII (ddg_ptr g) | |||
436 | { | |||
437 | if (targetm.sched.sms_res_mii) | |||
438 | return targetm.sched.sms_res_mii (g); | |||
439 | ||||
440 | return g->num_nodes / issue_rate; | |||
441 | } | |||
442 | ||||
443 | ||||
444 | /* A vector that contains the sched data for each ps_insn. */ | |||
445 | static vec<node_sched_params> node_sched_param_vec; | |||
446 | ||||
447 | /* Allocate sched_params for each node and initialize it. */ | |||
448 | static void | |||
449 | set_node_sched_params (ddg_ptr g) | |||
450 | { | |||
451 | node_sched_param_vec.truncate (0); | |||
452 | node_sched_param_vec.safe_grow_cleared (g->num_nodes, true); | |||
453 | } | |||
454 | ||||
455 | /* Make sure that node_sched_param_vec has an entry for every move in PS. */ | |||
456 | static void | |||
457 | extend_node_sched_params (partial_schedule_ptr ps) | |||
458 | { | |||
459 | node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes | |||
460 | + ps->reg_moves.length (), true); | |||
461 | } | |||
462 | ||||
463 | /* Update the sched_params (time, row and stage) for node U using the II, | |||
464 | the CYCLE of U and MIN_CYCLE. | |||
465 | We're not simply taking the following | |||
466 | SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); | |||
467 | because the stages may not be aligned on cycle 0. */ | |||
468 | static void | |||
469 | update_node_sched_params (int u, int ii, int cycle, int min_cycle) | |||
470 | { | |||
471 | int sc_until_cycle_zero; | |||
472 | int stage; | |||
473 | ||||
474 | SCHED_TIME (u)((&node_sched_param_vec[u])->time) = cycle; | |||
475 | SCHED_ROW (u)((&node_sched_param_vec[u])->row) = SMODULO (cycle, ii)((cycle) % (ii) < 0 ? ((cycle) % (ii) + (ii)) : (cycle) % ( ii)); | |||
476 | ||||
477 | /* The calculation of stage count is done adding the number | |||
478 | of stages before cycle zero and after cycle zero. */ | |||
479 | sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii)((-1 - min_cycle + 1 + ii - 1) / ii); | |||
480 | ||||
481 | if (SCHED_TIME (u)((&node_sched_param_vec[u])->time) < 0) | |||
482 | { | |||
483 | stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii)((-1 - ((&node_sched_param_vec[u])->time) + 1 + ii - 1 ) / ii); | |||
484 | SCHED_STAGE (u)((&node_sched_param_vec[u])->stage) = sc_until_cycle_zero - stage; | |||
485 | } | |||
486 | else | |||
487 | { | |||
488 | stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii)((((&node_sched_param_vec[u])->time) - 0 + 1 + ii - 1) / ii); | |||
489 | SCHED_STAGE (u)((&node_sched_param_vec[u])->stage) = sc_until_cycle_zero + stage - 1; | |||
490 | } | |||
491 | } | |||
492 | ||||
493 | static void | |||
494 | print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps) | |||
495 | { | |||
496 | int i; | |||
497 | ||||
498 | if (! file) | |||
499 | return; | |||
500 | for (i = 0; i < num_nodes; i++) | |||
501 | { | |||
502 | node_sched_params_ptr nsp = SCHED_PARAMS (i)(&node_sched_param_vec[i]); | |||
503 | ||||
504 | fprintf (file, "Node = %d; INSN = %d\n", i, | |||
505 | INSN_UID (ps_rtl_insn (ps, i))); | |||
506 | fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i])((&ps->g->nodes[i])->aux.count)); | |||
507 | fprintf (file, " time = %d:\n", nsp->time); | |||
508 | fprintf (file, " stage = %d:\n", nsp->stage); | |||
509 | } | |||
510 | } | |||
511 | ||||
512 | /* Set SCHED_COLUMN for each instruction in row ROW of PS. */ | |||
513 | static void | |||
514 | set_columns_for_row (partial_schedule_ptr ps, int row) | |||
515 | { | |||
516 | ps_insn_ptr cur_insn; | |||
517 | int column; | |||
518 | ||||
519 | column = 0; | |||
520 | for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row) | |||
521 | SCHED_COLUMN (cur_insn->id)((&node_sched_param_vec[cur_insn->id])->column) = column++; | |||
522 | } | |||
523 | ||||
524 | /* Set SCHED_COLUMN for each instruction in PS. */ | |||
525 | static void | |||
526 | set_columns_for_ps (partial_schedule_ptr ps) | |||
527 | { | |||
528 | int row; | |||
529 | ||||
530 | for (row = 0; row < ps->ii; row++) | |||
531 | set_columns_for_row (ps, row); | |||
532 | } | |||
533 | ||||
534 | /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS. | |||
535 | Its single predecessor has already been scheduled, as has its | |||
536 | ddg node successors. (The move may have also another move as its | |||
537 | successor, in which case that successor will be scheduled later.) | |||
538 | ||||
539 | The move is part of a chain that satisfies register dependencies | |||
540 | between a producing ddg node and various consuming ddg nodes. | |||
541 | If some of these dependencies have a distance of 1 (meaning that | |||
542 | the use is upward-exposed) then DISTANCE1_USES is nonnull and | |||
543 | contains the set of uses with distance-1 dependencies. | |||
544 | DISTANCE1_USES is null otherwise. | |||
545 | ||||
546 | MUST_FOLLOW is a scratch bitmap that is big enough to hold | |||
547 | all current ps_insn ids. | |||
548 | ||||
549 | Return true on success. */ | |||
550 | static bool | |||
551 | schedule_reg_move (partial_schedule_ptr ps, int i_reg_move, | |||
552 | sbitmap distance1_uses, sbitmap must_follow) | |||
553 | { | |||
554 | unsigned int u; | |||
555 | int this_time, this_distance, this_start, this_end, this_latency; | |||
556 | int start, end, c, ii; | |||
557 | sbitmap_iterator sbi; | |||
558 | ps_reg_move_info *move; | |||
559 | rtx_insn *this_insn; | |||
560 | ps_insn_ptr psi; | |||
561 | ||||
562 | move = ps_reg_move (ps, i_reg_move); | |||
563 | ii = ps->ii; | |||
564 | if (dump_file) | |||
565 | { | |||
566 | fprintf (dump_file, "Scheduling register move INSN %d; ii = %d" | |||
567 | ", min cycle = %d\n\n", INSN_UID (move->insn), ii, | |||
568 | PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle)); | |||
569 | print_rtl_single (dump_file, move->insn); | |||
570 | fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time"); | |||
571 | fprintf (dump_file, "=========== =========== =====\n"); | |||
572 | } | |||
573 | ||||
574 | start = INT_MIN(-2147483647 -1); | |||
575 | end = INT_MAX2147483647; | |||
576 | ||||
577 | /* For dependencies of distance 1 between a producer ddg node A | |||
578 | and consumer ddg node B, we have a chain of dependencies: | |||
579 | ||||
580 | A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B | |||
581 | ||||
582 | where Mi is the ith move. For dependencies of distance 0 between | |||
583 | a producer ddg node A and consumer ddg node C, we have a chain of | |||
584 | dependencies: | |||
585 | ||||
586 | A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C | |||
587 | ||||
588 | where Mi' occupies the same position as Mi but occurs a stage later. | |||
589 | We can only schedule each move once, so if we have both types of | |||
590 | chain, we model the second as: | |||
591 | ||||
592 | A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C | |||
593 | ||||
594 | First handle the dependencies between the previously-scheduled | |||
595 | predecessor and the move. */ | |||
596 | this_insn = ps_rtl_insn (ps, move->def); | |||
597 | this_latency = insn_latency (this_insn, move->insn); | |||
598 | this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0; | |||
599 | this_time = SCHED_TIME (move->def)((&node_sched_param_vec[move->def])->time) - this_distance * ii; | |||
600 | this_start = this_time + this_latency; | |||
601 | this_end = this_time + ii; | |||
602 | if (dump_file) | |||
603 | fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", | |||
604 | this_start, this_end, SCHED_TIME (move->def)((&node_sched_param_vec[move->def])->time), | |||
605 | INSN_UID (this_insn), this_latency, this_distance, | |||
606 | INSN_UID (move->insn)); | |||
607 | ||||
608 | if (start < this_start) | |||
609 | start = this_start; | |||
610 | if (end > this_end) | |||
611 | end = this_end; | |||
612 | ||||
613 | /* Handle the dependencies between the move and previously-scheduled | |||
614 | successors. */ | |||
615 | EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)for (bmp_iter_set_init (&(sbi), (move->uses), (0), & (u)); bmp_iter_set (&(sbi), &(u)); bmp_iter_next (& (sbi), &(u))) | |||
616 | { | |||
617 | this_insn = ps_rtl_insn (ps, u); | |||
618 | this_latency = insn_latency (move->insn, this_insn); | |||
619 | if (distance1_uses && !bitmap_bit_p (distance1_uses, u)) | |||
620 | this_distance = -1; | |||
621 | else | |||
622 | this_distance = 0; | |||
623 | this_time = SCHED_TIME (u)((&node_sched_param_vec[u])->time) + this_distance * ii; | |||
624 | this_start = this_time - ii; | |||
625 | this_end = this_time - this_latency; | |||
626 | if (dump_file) | |||
627 | fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", | |||
628 | this_start, this_end, SCHED_TIME (u)((&node_sched_param_vec[u])->time), INSN_UID (move->insn), | |||
629 | this_latency, this_distance, INSN_UID (this_insn)); | |||
630 | ||||
631 | if (start < this_start) | |||
632 | start = this_start; | |||
633 | if (end > this_end) | |||
634 | end = this_end; | |||
635 | } | |||
636 | ||||
637 | if (dump_file) | |||
638 | { | |||
639 | fprintf (dump_file, "----------- ----------- -----\n"); | |||
640 | fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)"); | |||
641 | } | |||
642 | ||||
643 | bitmap_clear (must_follow); | |||
644 | bitmap_set_bit (must_follow, move->def); | |||
645 | ||||
646 | start = MAX (start, end - (ii - 1))((start) > (end - (ii - 1)) ? (start) : (end - (ii - 1))); | |||
647 | for (c = end; c >= start; c--) | |||
648 | { | |||
649 | psi = ps_add_node_check_conflicts (ps, i_reg_move, c, | |||
650 | move->uses, must_follow); | |||
651 | if (psi) | |||
652 | { | |||
653 | update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle)); | |||
654 | if (dump_file) | |||
655 | fprintf (dump_file, "\nScheduled register move INSN %d at" | |||
656 | " time %d, row %d\n\n", INSN_UID (move->insn), c, | |||
657 | SCHED_ROW (i_reg_move)((&node_sched_param_vec[i_reg_move])->row)); | |||
658 | return true; | |||
659 | } | |||
660 | } | |||
661 | ||||
662 | if (dump_file) | |||
663 | fprintf (dump_file, "\nNo available slot\n\n"); | |||
664 | ||||
665 | return false; | |||
666 | } | |||
667 | ||||
668 | /* | |||
669 | Breaking intra-loop register anti-dependences: | |||
670 | Each intra-loop register anti-dependence implies a cross-iteration true | |||
671 | dependence of distance 1. Therefore, we can remove such false dependencies | |||
672 | and figure out if the partial schedule broke them by checking if (for a | |||
673 | true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and | |||
674 | if so generate a register move. The number of such moves is equal to: | |||
675 | SCHED_TIME (use) - SCHED_TIME (def) { 0 broken | |||
676 | nreg_moves = ----------------------------------- + 1 - { dependence. | |||
677 | ii { 1 if not. | |||
678 | */ | |||
679 | static bool | |||
680 | schedule_reg_moves (partial_schedule_ptr ps) | |||
681 | { | |||
682 | ddg_ptr g = ps->g; | |||
683 | int ii = ps->ii; | |||
684 | int i; | |||
685 | ||||
686 | for (i = 0; i < g->num_nodes; i++) | |||
687 | { | |||
688 | ddg_node_ptr u = &g->nodes[i]; | |||
689 | ddg_edge_ptr e; | |||
690 | int nreg_moves = 0, i_reg_move; | |||
691 | rtx prev_reg, old_reg; | |||
692 | int first_move; | |||
693 | int distances[2]; | |||
694 | sbitmap distance1_uses; | |||
695 | rtx set = single_set (u->insn); | |||
696 | ||||
697 | /* Skip instructions that do not set a register. */ | |||
698 | if (set && !REG_P (SET_DEST (set))(((enum rtx_code) ((((set)->u.fld[0]).rt_rtx))->code) == REG)) | |||
699 | continue; | |||
700 | ||||
701 | /* Compute the number of reg_moves needed for u, by looking at life | |||
702 | ranges started at u (excluding self-loops). */ | |||
703 | distances[0] = distances[1] = false; | |||
704 | for (e = u->out; e; e = e->next_out) | |||
705 | if (e->type == TRUE_DEP && e->dest != e->src) | |||
706 | { | |||
707 | int nreg_moves4e = (SCHED_TIME (e->dest->cuid)((&node_sched_param_vec[e->dest->cuid])->time) | |||
708 | - SCHED_TIME (e->src->cuid)((&node_sched_param_vec[e->src->cuid])->time)) / ii; | |||
709 | ||||
710 | if (e->distance == 1) | |||
711 | nreg_moves4e = (SCHED_TIME (e->dest->cuid)((&node_sched_param_vec[e->dest->cuid])->time) | |||
712 | - SCHED_TIME (e->src->cuid)((&node_sched_param_vec[e->src->cuid])->time) + ii) / ii; | |||
713 | ||||
714 | /* If dest precedes src in the schedule of the kernel, then dest | |||
715 | will read before src writes and we can save one reg_copy. */ | |||
716 | if (SCHED_ROW (e->dest->cuid)((&node_sched_param_vec[e->dest->cuid])->row) == SCHED_ROW (e->src->cuid)((&node_sched_param_vec[e->src->cuid])->row) | |||
717 | && SCHED_COLUMN (e->dest->cuid)((&node_sched_param_vec[e->dest->cuid])->column) < SCHED_COLUMN (e->src->cuid)((&node_sched_param_vec[e->src->cuid])->column)) | |||
718 | nreg_moves4e--; | |||
719 | ||||
720 | if (nreg_moves4e >= 1) | |||
721 | { | |||
722 | /* !single_set instructions are not supported yet and | |||
723 | thus we do not except to encounter them in the loop | |||
724 | except from the doloop part. For the latter case | |||
725 | we assume no regmoves are generated as the doloop | |||
726 | instructions are tied to the branch with an edge. */ | |||
727 | gcc_assert (set)((void)(!(set) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 727, __FUNCTION__), 0 : 0)); | |||
728 | /* If the instruction contains auto-inc register then | |||
729 | validate that the regmov is being generated for the | |||
730 | target regsiter rather then the inc'ed register. */ | |||
731 | gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn))((void)(!(!autoinc_var_is_used_p (u->insn, e->dest-> insn)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 731, __FUNCTION__), 0 : 0)); | |||
732 | } | |||
733 | ||||
734 | if (nreg_moves4e) | |||
735 | { | |||
736 | gcc_assert (e->distance < 2)((void)(!(e->distance < 2) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 736, __FUNCTION__), 0 : 0)); | |||
737 | distances[e->distance] = true; | |||
738 | } | |||
739 | nreg_moves = MAX (nreg_moves, nreg_moves4e)((nreg_moves) > (nreg_moves4e) ? (nreg_moves) : (nreg_moves4e )); | |||
740 | } | |||
741 | ||||
742 | if (nreg_moves == 0) | |||
743 | continue; | |||
744 | ||||
745 | /* Create NREG_MOVES register moves. */ | |||
746 | first_move = ps->reg_moves.length (); | |||
747 | ps->reg_moves.safe_grow_cleared (first_move + nreg_moves, true); | |||
748 | extend_node_sched_params (ps); | |||
749 | ||||
750 | /* Record the moves associated with this node. */ | |||
751 | first_move += ps->g->num_nodes; | |||
752 | ||||
753 | /* Generate each move. */ | |||
754 | old_reg = prev_reg = SET_DEST (set)(((set)->u.fld[0]).rt_rtx); | |||
755 | if (HARD_REGISTER_P (old_reg)((((rhs_regno(old_reg))) < 76))) | |||
756 | return false; | |||
757 | ||||
758 | for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) | |||
759 | { | |||
760 | ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move); | |||
761 | ||||
762 | move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i; | |||
763 | move->uses = sbitmap_alloc (first_move + nreg_moves); | |||
764 | move->old_reg = old_reg; | |||
765 | move->new_reg = gen_reg_rtx (GET_MODE (prev_reg)((machine_mode) (prev_reg)->mode)); | |||
766 | move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1; | |||
767 | move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg)); | |||
768 | bitmap_clear (move->uses); | |||
769 | ||||
770 | prev_reg = move->new_reg; | |||
771 | } | |||
772 | ||||
773 | distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULLnullptr; | |||
774 | ||||
775 | if (distance1_uses) | |||
776 | bitmap_clear (distance1_uses); | |||
777 | ||||
778 | /* Every use of the register defined by node may require a different | |||
779 | copy of this register, depending on the time the use is scheduled. | |||
780 | Record which uses require which move results. */ | |||
781 | for (e = u->out; e; e = e->next_out) | |||
782 | if (e->type == TRUE_DEP && e->dest != e->src) | |||
783 | { | |||
784 | int dest_copy = (SCHED_TIME (e->dest->cuid)((&node_sched_param_vec[e->dest->cuid])->time) | |||
785 | - SCHED_TIME (e->src->cuid)((&node_sched_param_vec[e->src->cuid])->time)) / ii; | |||
786 | ||||
787 | if (e->distance == 1) | |||
788 | dest_copy = (SCHED_TIME (e->dest->cuid)((&node_sched_param_vec[e->dest->cuid])->time) | |||
789 | - SCHED_TIME (e->src->cuid)((&node_sched_param_vec[e->src->cuid])->time) + ii) / ii; | |||
790 | ||||
791 | if (SCHED_ROW (e->dest->cuid)((&node_sched_param_vec[e->dest->cuid])->row) == SCHED_ROW (e->src->cuid)((&node_sched_param_vec[e->src->cuid])->row) | |||
792 | && SCHED_COLUMN (e->dest->cuid)((&node_sched_param_vec[e->dest->cuid])->column) < SCHED_COLUMN (e->src->cuid)((&node_sched_param_vec[e->src->cuid])->column)) | |||
793 | dest_copy--; | |||
794 | ||||
795 | if (dest_copy) | |||
796 | { | |||
797 | ps_reg_move_info *move; | |||
798 | ||||
799 | move = ps_reg_move (ps, first_move + dest_copy - 1); | |||
800 | bitmap_set_bit (move->uses, e->dest->cuid); | |||
801 | if (e->distance == 1) | |||
802 | bitmap_set_bit (distance1_uses, e->dest->cuid); | |||
803 | } | |||
804 | } | |||
805 | ||||
806 | auto_sbitmap must_follow (first_move + nreg_moves); | |||
807 | for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) | |||
808 | if (!schedule_reg_move (ps, first_move + i_reg_move, | |||
809 | distance1_uses, must_follow)) | |||
810 | break; | |||
811 | if (distance1_uses) | |||
812 | sbitmap_free (distance1_uses); | |||
813 | if (i_reg_move < nreg_moves) | |||
814 | return false; | |||
815 | } | |||
816 | return true; | |||
817 | } | |||
818 | ||||
819 | /* Emit the moves associated with PS. Apply the substitutions | |||
820 | associated with them. */ | |||
821 | static void | |||
822 | apply_reg_moves (partial_schedule_ptr ps) | |||
823 | { | |||
824 | ps_reg_move_info *move; | |||
825 | int i; | |||
826 | ||||
827 | FOR_EACH_VEC_ELT (ps->reg_moves, i, move)for (i = 0; (ps->reg_moves).iterate ((i), &(move)); ++ (i)) | |||
828 | { | |||
829 | unsigned int i_use; | |||
830 | sbitmap_iterator sbi; | |||
831 | ||||
832 | EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)for (bmp_iter_set_init (&(sbi), (move->uses), (0), & (i_use)); bmp_iter_set (&(sbi), &(i_use)); bmp_iter_next (&(sbi), &(i_use))) | |||
833 | { | |||
834 | replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg); | |||
835 | df_insn_rescan (ps->g->nodes[i_use].insn); | |||
836 | } | |||
837 | } | |||
838 | } | |||
839 | ||||
840 | /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of | |||
841 | SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT | |||
842 | will move to cycle zero. */ | |||
843 | static void | |||
844 | reset_sched_times (partial_schedule_ptr ps, int amount) | |||
845 | { | |||
846 | int row; | |||
847 | int ii = ps->ii; | |||
848 | ps_insn_ptr crr_insn; | |||
849 | ||||
850 | for (row = 0; row < ii; row++) | |||
851 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |||
852 | { | |||
853 | int u = crr_insn->id; | |||
854 | int normalized_time = SCHED_TIME (u)((&node_sched_param_vec[u])->time) - amount; | |||
855 | int new_min_cycle = PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle) - amount; | |||
856 | ||||
857 | if (dump_file) | |||
858 | { | |||
859 | /* Print the scheduling times after the rotation. */ | |||
860 | rtx_insn *insn = ps_rtl_insn (ps, u); | |||
861 | ||||
862 | fprintf (dump_file, "crr_insn->node=%d (insn id %d), " | |||
863 | "crr_insn->cycle=%d, min_cycle=%d", u, | |||
864 | INSN_UID (insn), normalized_time, new_min_cycle); | |||
865 | if (JUMP_P (insn)(((enum rtx_code) (insn)->code) == JUMP_INSN)) | |||
866 | fprintf (dump_file, " (branch)"); | |||
867 | fprintf (dump_file, "\n"); | |||
868 | } | |||
869 | ||||
870 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle)((void)(!(((&node_sched_param_vec[u])->time) >= ps-> min_cycle) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 870, __FUNCTION__), 0 : 0)); | |||
871 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle)((void)(!(((&node_sched_param_vec[u])->time) <= ps-> max_cycle) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 871, __FUNCTION__), 0 : 0)); | |||
872 | ||||
873 | crr_insn->cycle = normalized_time; | |||
874 | update_node_sched_params (u, ii, normalized_time, new_min_cycle); | |||
875 | } | |||
876 | } | |||
877 | ||||
878 | /* Permute the insns according to their order in PS, from row 0 to | |||
879 | row ii-1, and position them right before LAST. This schedules | |||
880 | the insns of the loop kernel. */ | |||
881 | static void | |||
882 | permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last) | |||
883 | { | |||
884 | int ii = ps->ii; | |||
885 | int row; | |||
886 | ps_insn_ptr ps_ij; | |||
887 | ||||
888 | for (row = 0; row < ii ; row++) | |||
889 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |||
890 | { | |||
891 | rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id); | |||
892 | ||||
893 | if (PREV_INSN (last) != insn) | |||
894 | { | |||
895 | if (ps_ij->id < ps->g->num_nodes) | |||
896 | reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn, | |||
897 | PREV_INSN (last)); | |||
898 | else | |||
899 | add_insn_before (insn, last, NULLnullptr); | |||
900 | } | |||
901 | } | |||
902 | } | |||
903 | ||||
904 | /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE | |||
905 | respectively only if cycle C falls on the border of the scheduling | |||
906 | window boundaries marked by START and END cycles. STEP is the | |||
907 | direction of the window. */ | |||
908 | static inline void | |||
909 | set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow, | |||
910 | sbitmap *tmp_precede, sbitmap must_precede, int c, | |||
911 | int start, int end, int step) | |||
912 | { | |||
913 | *tmp_precede = NULLnullptr; | |||
914 | *tmp_follow = NULLnullptr; | |||
915 | ||||
916 | if (c == start) | |||
917 | { | |||
918 | if (step == 1) | |||
919 | *tmp_precede = must_precede; | |||
920 | else /* step == -1. */ | |||
921 | *tmp_follow = must_follow; | |||
922 | } | |||
923 | if (c == end - step) | |||
924 | { | |||
925 | if (step == 1) | |||
926 | *tmp_follow = must_follow; | |||
927 | else /* step == -1. */ | |||
928 | *tmp_precede = must_precede; | |||
929 | } | |||
930 | ||||
931 | } | |||
932 | ||||
933 | /* Return True if the branch can be moved to row ii-1 while | |||
934 | normalizing the partial schedule PS to start from cycle zero and thus | |||
935 | optimize the SC. Otherwise return False. */ | |||
936 | static bool | |||
937 | optimize_sc (partial_schedule_ptr ps, ddg_ptr g) | |||
938 | { | |||
939 | int amount = PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle); | |||
940 | int start, end, step; | |||
941 | int ii = ps->ii; | |||
942 | bool ok = false; | |||
943 | int stage_count, stage_count_curr; | |||
944 | ||||
945 | /* Compare the SC after normalization and SC after bringing the branch | |||
946 | to row ii-1. If they are equal just bail out. */ | |||
947 | stage_count = calculate_stage_count (ps, amount); | |||
948 | stage_count_curr = | |||
949 | calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid)((&node_sched_param_vec[g->closing_branch->cuid])-> time) - (ii - 1)); | |||
950 | ||||
951 | if (stage_count == stage_count_curr) | |||
952 | { | |||
953 | if (dump_file) | |||
954 | fprintf (dump_file, "SMS SC already optimized.\n"); | |||
955 | ||||
956 | return false; | |||
957 | } | |||
958 | ||||
959 | if (dump_file) | |||
960 | { | |||
961 | fprintf (dump_file, "SMS Trying to optimize branch location\n"); | |||
962 | fprintf (dump_file, "SMS partial schedule before trial:\n"); | |||
963 | print_partial_schedule (ps, dump_file); | |||
964 | } | |||
965 | ||||
966 | /* First, normalize the partial scheduling. */ | |||
967 | reset_sched_times (ps, amount); | |||
968 | rotate_partial_schedule (ps, amount); | |||
969 | if (dump_file) | |||
970 | { | |||
971 | fprintf (dump_file, | |||
972 | "SMS partial schedule after normalization (ii, %d, SC %d):\n", | |||
973 | ii, stage_count); | |||
974 | print_partial_schedule (ps, dump_file); | |||
975 | } | |||
976 | ||||
977 | if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii)((((&node_sched_param_vec[g->closing_branch->cuid]) ->time)) % (ii) < 0 ? ((((&node_sched_param_vec[g-> closing_branch->cuid])->time)) % (ii) + (ii)) : (((& node_sched_param_vec[g->closing_branch->cuid])->time )) % (ii)) == ii - 1) | |||
978 | return true; | |||
979 | ||||
980 | auto_sbitmap sched_nodes (g->num_nodes); | |||
981 | bitmap_ones (sched_nodes); | |||
982 | ||||
983 | /* Calculate the new placement of the branch. It should be in row | |||
984 | ii-1 and fall into it's scheduling window. */ | |||
985 | if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start, | |||
986 | &step, &end) == 0) | |||
987 | { | |||
988 | bool success; | |||
989 | ps_insn_ptr next_ps_i; | |||
990 | int branch_cycle = SCHED_TIME (g->closing_branch->cuid)((&node_sched_param_vec[g->closing_branch->cuid])-> time); | |||
991 | int row = SMODULO (branch_cycle, ps->ii)((branch_cycle) % (ps->ii) < 0 ? ((branch_cycle) % (ps-> ii) + (ps->ii)) : (branch_cycle) % (ps->ii)); | |||
992 | int num_splits = 0; | |||
993 | sbitmap tmp_precede, tmp_follow; | |||
994 | int min_cycle, c; | |||
995 | ||||
996 | if (dump_file) | |||
997 | fprintf (dump_file, "\nTrying to schedule node %d " | |||
998 | "INSN = %d in (%d .. %d) step %d\n", | |||
999 | g->closing_branch->cuid, | |||
1000 | (INSN_UID (g->closing_branch->insn)), start, end, step); | |||
1001 | ||||
1002 | gcc_assert ((step > 0 && start < end) || (step < 0 && start > end))((void)(!((step > 0 && start < end) || (step < 0 && start > end)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1002, __FUNCTION__), 0 : 0)); | |||
1003 | if (step == 1) | |||
1004 | { | |||
1005 | c = start + ii - SMODULO (start, ii)((start) % (ii) < 0 ? ((start) % (ii) + (ii)) : (start) % ( ii)) - 1; | |||
1006 | gcc_assert (c >= start)((void)(!(c >= start) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1006, __FUNCTION__), 0 : 0)); | |||
1007 | if (c >= end) | |||
1008 | { | |||
1009 | if (dump_file) | |||
1010 | fprintf (dump_file, | |||
1011 | "SMS failed to schedule branch at cycle: %d\n", c); | |||
1012 | return false; | |||
1013 | } | |||
1014 | } | |||
1015 | else | |||
1016 | { | |||
1017 | c = start - SMODULO (start, ii)((start) % (ii) < 0 ? ((start) % (ii) + (ii)) : (start) % ( ii)) - 1; | |||
1018 | gcc_assert (c <= start)((void)(!(c <= start) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1018, __FUNCTION__), 0 : 0)); | |||
1019 | ||||
1020 | if (c <= end) | |||
1021 | { | |||
1022 | if (dump_file) | |||
1023 | fprintf (dump_file, | |||
1024 | "SMS failed to schedule branch at cycle: %d\n", c); | |||
1025 | return false; | |||
1026 | } | |||
1027 | } | |||
1028 | ||||
1029 | auto_sbitmap must_precede (g->num_nodes); | |||
1030 | auto_sbitmap must_follow (g->num_nodes); | |||
1031 | ||||
1032 | /* Try to schedule the branch is it's new cycle. */ | |||
1033 | calculate_must_precede_follow (g->closing_branch, start, end, | |||
1034 | step, ii, sched_nodes, | |||
1035 | must_precede, must_follow); | |||
1036 | ||||
1037 | set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, | |||
1038 | must_precede, c, start, end, step); | |||
1039 | ||||
1040 | /* Find the element in the partial schedule related to the closing | |||
1041 | branch so we can remove it from it's current cycle. */ | |||
1042 | for (next_ps_i = ps->rows[row]; | |||
1043 | next_ps_i; next_ps_i = next_ps_i->next_in_row) | |||
1044 | if (next_ps_i->id == g->closing_branch->cuid) | |||
1045 | break; | |||
1046 | ||||
1047 | min_cycle = PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle) - SMODULO (PS_MIN_CYCLE (ps), ps->ii)(((((partial_schedule_ptr)(ps))->min_cycle)) % (ps->ii) < 0 ? (((((partial_schedule_ptr)(ps))->min_cycle)) % ( ps->ii) + (ps->ii)) : ((((partial_schedule_ptr)(ps))-> min_cycle)) % (ps->ii)); | |||
1048 | remove_node_from_ps (ps, next_ps_i); | |||
1049 | success = | |||
1050 | try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c, | |||
1051 | sched_nodes, &num_splits, | |||
1052 | tmp_precede, tmp_follow); | |||
1053 | gcc_assert (num_splits == 0)((void)(!(num_splits == 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1053, __FUNCTION__), 0 : 0)); | |||
1054 | if (!success) | |||
1055 | { | |||
1056 | if (dump_file) | |||
1057 | fprintf (dump_file, | |||
1058 | "SMS failed to schedule branch at cycle: %d, " | |||
1059 | "bringing it back to cycle %d\n", c, branch_cycle); | |||
1060 | ||||
1061 | /* The branch was failed to be placed in row ii - 1. | |||
1062 | Put it back in it's original place in the partial | |||
1063 | schedualing. */ | |||
1064 | set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, | |||
1065 | must_precede, branch_cycle, start, end, | |||
1066 | step); | |||
1067 | success = | |||
1068 | try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, | |||
1069 | branch_cycle, sched_nodes, | |||
1070 | &num_splits, tmp_precede, | |||
1071 | tmp_follow); | |||
1072 | gcc_assert (success && (num_splits == 0))((void)(!(success && (num_splits == 0)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1072, __FUNCTION__), 0 : 0)); | |||
1073 | ok = false; | |||
1074 | } | |||
1075 | else | |||
1076 | { | |||
1077 | /* The branch is placed in row ii - 1. */ | |||
1078 | if (dump_file) | |||
1079 | fprintf (dump_file, | |||
1080 | "SMS success in moving branch to cycle %d\n", c); | |||
1081 | ||||
1082 | update_node_sched_params (g->closing_branch->cuid, ii, c, | |||
1083 | PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle)); | |||
1084 | ok = true; | |||
1085 | } | |||
1086 | ||||
1087 | /* This might have been added to a new first stage. */ | |||
1088 | if (PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle) < min_cycle) | |||
1089 | reset_sched_times (ps, 0); | |||
1090 | } | |||
1091 | ||||
1092 | return ok; | |||
1093 | } | |||
1094 | ||||
1095 | static void | |||
1096 | duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, | |||
1097 | int to_stage, rtx count_reg, class loop *loop) | |||
1098 | { | |||
1099 | int row; | |||
1100 | ps_insn_ptr ps_ij; | |||
1101 | copy_bb_data id; | |||
1102 | ||||
1103 | for (row = 0; row < ps->ii; row++) | |||
1104 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |||
1105 | { | |||
1106 | int u = ps_ij->id; | |||
1107 | int first_u, last_u; | |||
1108 | rtx_insn *u_insn; | |||
1109 | ||||
1110 | /* Do not duplicate any insn which refers to count_reg as it | |||
1111 | belongs to the control part. | |||
1112 | The closing branch is scheduled as well and thus should | |||
1113 | be ignored. | |||
1114 | TODO: This should be done by analyzing the control part of | |||
1115 | the loop. */ | |||
1116 | u_insn = ps_rtl_insn (ps, u); | |||
1117 | if (reg_mentioned_p (count_reg, u_insn) | |||
1118 | || JUMP_P (u_insn)(((enum rtx_code) (u_insn)->code) == JUMP_INSN)) | |||
1119 | continue; | |||
1120 | ||||
1121 | first_u = SCHED_STAGE (u)((&node_sched_param_vec[u])->stage); | |||
1122 | last_u = first_u + ps_num_consecutive_stages (ps, u) - 1; | |||
1123 | if (from_stage <= last_u && to_stage >= first_u) | |||
1124 | { | |||
1125 | if (u < ps->g->num_nodes) | |||
1126 | duplicate_insn_chain (ps_first_note (ps, u), u_insn, | |||
1127 | loop, &id); | |||
1128 | else | |||
1129 | emit_insn (copy_rtx (PATTERN (u_insn))); | |||
1130 | } | |||
1131 | } | |||
1132 | } | |||
1133 | ||||
1134 | ||||
1135 | /* Generate the instructions (including reg_moves) for prolog & epilog. */ | |||
1136 | static void | |||
1137 | generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop, | |||
1138 | rtx count_reg, bool adjust_init) | |||
1139 | { | |||
1140 | int i; | |||
1141 | int last_stage = PS_STAGE_COUNT (ps)(((partial_schedule_ptr)(ps))->stage_count) - 1; | |||
1142 | edge e; | |||
1143 | ||||
1144 | /* Generate the prolog, inserting its insns on the loop-entry edge. */ | |||
1145 | start_sequence (); | |||
1146 | ||||
1147 | if (adjust_init) | |||
1148 | { | |||
1149 | /* Generate instructions at the beginning of the prolog to | |||
1150 | adjust the loop count by STAGE_COUNT. If loop count is constant | |||
1151 | and it not used anywhere in prologue, this constant is adjusted by | |||
1152 | STAGE_COUNT outside of generate_prolog_epilog function. */ | |||
1153 | rtx sub_reg = NULL_RTX(rtx) 0; | |||
1154 | ||||
1155 | sub_reg = expand_simple_binop (GET_MODE (count_reg)((machine_mode) (count_reg)->mode), MINUS, count_reg, | |||
1156 | gen_int_mode (last_stage, | |||
1157 | GET_MODE (count_reg)((machine_mode) (count_reg)->mode)), | |||
1158 | count_reg, 1, OPTAB_DIRECT); | |||
1159 | gcc_assert (REG_P (sub_reg))((void)(!((((enum rtx_code) (sub_reg)->code) == REG)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1159, __FUNCTION__), 0 : 0)); | |||
1160 | if (REGNO (sub_reg)(rhs_regno(sub_reg)) != REGNO (count_reg)(rhs_regno(count_reg))) | |||
1161 | emit_move_insn (count_reg, sub_reg); | |||
1162 | } | |||
1163 | ||||
1164 | for (i = 0; i < last_stage; i++) | |||
1165 | duplicate_insns_of_cycles (ps, 0, i, count_reg, loop); | |||
1166 | ||||
1167 | /* Put the prolog on the entry edge. */ | |||
1168 | e = loop_preheader_edge (loop); | |||
1169 | split_edge_and_insert (e, get_insns ()); | |||
1170 | if (!flag_resched_modulo_schedglobal_options.x_flag_resched_modulo_sched) | |||
1171 | e->dest->flags |= BB_DISABLE_SCHEDULE; | |||
1172 | ||||
1173 | end_sequence (); | |||
1174 | ||||
1175 | /* Generate the epilog, inserting its insns on the loop-exit edge. */ | |||
1176 | start_sequence (); | |||
1177 | ||||
1178 | for (i = 0; i < last_stage; i++) | |||
1179 | duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop); | |||
1180 | ||||
1181 | /* Put the epilogue on the exit edge. */ | |||
1182 | gcc_assert (single_exit (loop))((void)(!(single_exit (loop)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1182, __FUNCTION__), 0 : 0)); | |||
1183 | e = single_exit (loop); | |||
1184 | split_edge_and_insert (e, get_insns ()); | |||
1185 | if (!flag_resched_modulo_schedglobal_options.x_flag_resched_modulo_sched) | |||
1186 | e->dest->flags |= BB_DISABLE_SCHEDULE; | |||
1187 | ||||
1188 | end_sequence (); | |||
1189 | } | |||
1190 | ||||
1191 | /* Mark LOOP as software pipelined so the later | |||
1192 | scheduling passes don't touch it. */ | |||
1193 | static void | |||
1194 | mark_loop_unsched (class loop *loop) | |||
1195 | { | |||
1196 | unsigned i; | |||
1197 | basic_block *bbs = get_loop_body (loop); | |||
1198 | ||||
1199 | for (i = 0; i < loop->num_nodes; i++) | |||
1200 | bbs[i]->flags |= BB_DISABLE_SCHEDULE; | |||
1201 | ||||
1202 | free (bbs); | |||
1203 | } | |||
1204 | ||||
1205 | /* Return true if all the BBs of the loop are empty except the | |||
1206 | loop header. */ | |||
1207 | static bool | |||
1208 | loop_single_full_bb_p (class loop *loop) | |||
1209 | { | |||
1210 | unsigned i; | |||
1211 | basic_block *bbs = get_loop_body (loop); | |||
1212 | ||||
1213 | for (i = 0; i < loop->num_nodes ; i++) | |||
1214 | { | |||
1215 | rtx_insn *head, *tail; | |||
1216 | bool empty_bb = true; | |||
1217 | ||||
1218 | if (bbs[i] == loop->header) | |||
1219 | continue; | |||
1220 | ||||
1221 | /* Make sure that basic blocks other than the header | |||
1222 | have only notes labels or jumps. */ | |||
1223 | get_ebb_head_tail (bbs[i], bbs[i], &head, &tail); | |||
1224 | for (; head != NEXT_INSN (tail); head = NEXT_INSN (head)) | |||
1225 | { | |||
1226 | if (NOTE_P (head)(((enum rtx_code) (head)->code) == NOTE) || LABEL_P (head)(((enum rtx_code) (head)->code) == CODE_LABEL) | |||
1227 | || (INSN_P (head)(((((enum rtx_code) (head)->code) == INSN) || (((enum rtx_code ) (head)->code) == JUMP_INSN) || (((enum rtx_code) (head)-> code) == CALL_INSN)) || (((enum rtx_code) (head)->code) == DEBUG_INSN)) && (DEBUG_INSN_P (head)(((enum rtx_code) (head)->code) == DEBUG_INSN) || JUMP_P (head)(((enum rtx_code) (head)->code) == JUMP_INSN)))) | |||
1228 | continue; | |||
1229 | empty_bb = false; | |||
1230 | break; | |||
1231 | } | |||
1232 | ||||
1233 | if (! empty_bb) | |||
1234 | { | |||
1235 | free (bbs); | |||
1236 | return false; | |||
1237 | } | |||
1238 | } | |||
1239 | free (bbs); | |||
1240 | return true; | |||
1241 | } | |||
1242 | ||||
1243 | /* Dump file:line from INSN's location info to dump_file. */ | |||
1244 | ||||
1245 | static void | |||
1246 | dump_insn_location (rtx_insn *insn) | |||
1247 | { | |||
1248 | if (dump_file && INSN_HAS_LOCATION (insn)) | |||
1249 | { | |||
1250 | expanded_location xloc = insn_location (insn); | |||
1251 | fprintf (dump_file, " %s:%i", xloc.file, xloc.line); | |||
1252 | } | |||
1253 | } | |||
1254 | ||||
1255 | /* A simple loop from SMS point of view; it is a loop that is composed of | |||
1256 | either a single basic block or two BBs - a header and a latch. */ | |||
1257 | #define SIMPLE_SMS_LOOP_P(loop)((loop->num_nodes < 3 ) && (vec_safe_length (loop ->latch->preds) == 1) && (vec_safe_length (loop ->latch->succs) == 1)) ((loop->num_nodes < 3 ) \ | |||
1258 | && (EDGE_COUNT (loop->latch->preds)vec_safe_length (loop->latch->preds) == 1) \ | |||
1259 | && (EDGE_COUNT (loop->latch->succs)vec_safe_length (loop->latch->succs) == 1)) | |||
1260 | ||||
1261 | /* Return true if the loop is in its canonical form and false if not. | |||
1262 | i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */ | |||
1263 | static bool | |||
1264 | loop_canon_p (class loop *loop) | |||
1265 | { | |||
1266 | ||||
1267 | if (loop->inner || !loop_outer (loop)) | |||
1268 | { | |||
1269 | if (dump_file) | |||
1270 | fprintf (dump_file, "SMS loop inner or !loop_outer\n"); | |||
1271 | return false; | |||
1272 | } | |||
1273 | ||||
1274 | if (!single_exit (loop)) | |||
1275 | { | |||
1276 | if (dump_file) | |||
1277 | { | |||
1278 | rtx_insn *insn = BB_END (loop->header)(loop->header)->il.x.rtl->end_; | |||
1279 | ||||
1280 | fprintf (dump_file, "SMS loop many exits"); | |||
1281 | dump_insn_location (insn); | |||
1282 | fprintf (dump_file, "\n"); | |||
1283 | } | |||
1284 | return false; | |||
1285 | } | |||
1286 | ||||
1287 | if (! SIMPLE_SMS_LOOP_P (loop)((loop->num_nodes < 3 ) && (vec_safe_length (loop ->latch->preds) == 1) && (vec_safe_length (loop ->latch->succs) == 1)) && ! loop_single_full_bb_p (loop)) | |||
1288 | { | |||
1289 | if (dump_file) | |||
1290 | { | |||
1291 | rtx_insn *insn = BB_END (loop->header)(loop->header)->il.x.rtl->end_; | |||
1292 | ||||
1293 | fprintf (dump_file, "SMS loop many BBs."); | |||
1294 | dump_insn_location (insn); | |||
1295 | fprintf (dump_file, "\n"); | |||
1296 | } | |||
1297 | return false; | |||
1298 | } | |||
1299 | ||||
1300 | return true; | |||
1301 | } | |||
1302 | ||||
1303 | /* If there are more than one entry for the loop, | |||
1304 | make it one by splitting the first entry edge and | |||
1305 | redirecting the others to the new BB. */ | |||
1306 | static void | |||
1307 | canon_loop (class loop *loop) | |||
1308 | { | |||
1309 | edge e; | |||
1310 | edge_iterator i; | |||
1311 | ||||
1312 | /* Avoid annoying special cases of edges going to exit | |||
1313 | block. */ | |||
1314 | FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)for ((i) = ei_start_1 (&(((((cfun + 0))->cfg->x_exit_block_ptr )->preds))); ei_cond ((i), &(e)); ei_next (&(i))) | |||
1315 | if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs)vec_safe_length (e->src->succs) > 1)) | |||
1316 | split_edge (e); | |||
1317 | ||||
1318 | if (loop->latch == loop->header | |||
1319 | || EDGE_COUNT (loop->latch->succs)vec_safe_length (loop->latch->succs) > 1) | |||
1320 | { | |||
1321 | FOR_EACH_EDGE (e, i, loop->header->preds)for ((i) = ei_start_1 (&((loop->header->preds))); ei_cond ((i), &(e)); ei_next (&(i))) | |||
1322 | if (e->src == loop->latch) | |||
1323 | break; | |||
1324 | split_edge (e); | |||
1325 | } | |||
1326 | } | |||
1327 | ||||
1328 | /* Setup infos. */ | |||
1329 | static void | |||
1330 | setup_sched_infos (void) | |||
1331 | { | |||
1332 | memcpy (&sms_common_sched_info, &haifa_common_sched_info, | |||
1333 | sizeof (sms_common_sched_info)); | |||
1334 | sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS; | |||
1335 | common_sched_info = &sms_common_sched_info; | |||
1336 | ||||
1337 | sched_deps_info = &sms_sched_deps_info; | |||
1338 | current_sched_info = &sms_sched_info; | |||
1339 | } | |||
1340 | ||||
1341 | /* Probability in % that the sms-ed loop rolls enough so that optimized | |||
1342 | version may be entered. Just a guess. */ | |||
1343 | #define PROB_SMS_ENOUGH_ITERATIONS80 80 | |||
1344 | ||||
1345 | /* Main entry point, perform SMS scheduling on the loops of the function | |||
1346 | that consist of single basic blocks. */ | |||
1347 | static void | |||
1348 | sms_schedule (void) | |||
1349 | { | |||
1350 | rtx_insn *insn; | |||
1351 | ddg_ptr *g_arr, g; | |||
1352 | int * node_order; | |||
1353 | int maxii, max_asap; | |||
1354 | partial_schedule_ptr ps; | |||
1355 | basic_block bb = NULLnullptr; | |||
| ||||
1356 | basic_block condition_bb = NULLnullptr; | |||
1357 | edge latch_edge; | |||
1358 | HOST_WIDE_INTlong trip_count, max_trip_count; | |||
1359 | HARD_REG_SET prohibited_regs; | |||
1360 | ||||
1361 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS | |||
1362 | | LOOPS_HAVE_RECORDED_EXITS); | |||
1363 | if (number_of_loops (cfun(cfun + 0)) <= 1) | |||
1364 | { | |||
1365 | loop_optimizer_finalize (); | |||
1366 | return; /* There are no loops to schedule. */ | |||
1367 | } | |||
1368 | ||||
1369 | /* Initialize issue_rate. */ | |||
1370 | if (targetm.sched.issue_rate) | |||
1371 | { | |||
1372 | int temp = reload_completed; | |||
1373 | ||||
1374 | reload_completed = 1; | |||
1375 | issue_rate = targetm.sched.issue_rate (); | |||
1376 | reload_completed = temp; | |||
1377 | } | |||
1378 | else | |||
1379 | issue_rate = 1; | |||
1380 | ||||
1381 | /* Initialize the scheduler. */ | |||
1382 | setup_sched_infos (); | |||
1383 | haifa_sched_init (); | |||
1384 | ||||
1385 | /* Allocate memory to hold the DDG array one entry for each loop. | |||
1386 | We use loop->num as index into this array. */ | |||
1387 | g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun))((ddg_ptr *) xcalloc ((number_of_loops ((cfun + 0))), sizeof ( ddg_ptr))); | |||
1388 | ||||
1389 | REG_SET_TO_HARD_REG_SET (prohibited_regs, &df->regular_block_artificial_uses)do { CLEAR_HARD_REG_SET (prohibited_regs); reg_set_to_hard_reg_set (&prohibited_regs, &df->regular_block_artificial_uses ); } while (0); | |||
1390 | ||||
1391 | if (dump_file) | |||
1392 | { | |||
1393 | fprintf (dump_file, "\n\nSMS analysis phase\n"); | |||
1394 | fprintf (dump_file, "===================\n\n"); | |||
1395 | } | |||
1396 | ||||
1397 | /* Build DDGs for all the relevant loops and hold them in G_ARR | |||
1398 | indexed by the loop index. */ | |||
1399 | for (auto loop : loops_list (cfun(cfun + 0), 0)) | |||
1400 | { | |||
1401 | rtx_insn *head, *tail; | |||
1402 | rtx count_reg; | |||
1403 | ||||
1404 | /* For debugging. */ | |||
1405 | if (dbg_cnt (sms_sched_loop) == false) | |||
1406 | { | |||
1407 | if (dump_file) | |||
1408 | fprintf (dump_file, "SMS reached max limit... \n"); | |||
1409 | ||||
1410 | break; | |||
1411 | } | |||
1412 | ||||
1413 | if (dump_file) | |||
1414 | { | |||
1415 | rtx_insn *insn = BB_END (loop->header)(loop->header)->il.x.rtl->end_; | |||
1416 | ||||
1417 | fprintf (dump_file, "SMS loop num: %d", loop->num); | |||
1418 | dump_insn_location (insn); | |||
1419 | fprintf (dump_file, "\n"); | |||
1420 | } | |||
1421 | ||||
1422 | if (! loop_canon_p (loop)) | |||
1423 | continue; | |||
1424 | ||||
1425 | if (! loop_single_full_bb_p (loop)) | |||
1426 | { | |||
1427 | if (dump_file) | |||
1428 | fprintf (dump_file, "SMS not loop_single_full_bb_p\n"); | |||
1429 | continue; | |||
1430 | } | |||
1431 | ||||
1432 | bb = loop->header; | |||
1433 | ||||
1434 | get_ebb_head_tail (bb, bb, &head, &tail); | |||
1435 | latch_edge = loop_latch_edge (loop); | |||
1436 | gcc_assert (single_exit (loop))((void)(!(single_exit (loop)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1436, __FUNCTION__), 0 : 0)); | |||
1437 | trip_count = get_estimated_loop_iterations_int (loop); | |||
1438 | max_trip_count = get_max_loop_iterations_int (loop); | |||
1439 | ||||
1440 | /* Perform SMS only on loops that their average count is above threshold. */ | |||
1441 | ||||
1442 | if (latch_edge->count () > profile_count::zero () | |||
1443 | && (latch_edge->count () | |||
1444 | < (single_exit (loop)->count () | |||
1445 | * param_sms_loop_average_count_thresholdglobal_options.x_param_sms_loop_average_count_threshold))) | |||
1446 | { | |||
1447 | if (dump_file) | |||
1448 | { | |||
1449 | dump_insn_location (tail); | |||
1450 | fprintf (dump_file, "\nSMS single-bb-loop\n"); | |||
1451 | if (profile_info && flag_branch_probabilitiesglobal_options.x_flag_branch_probabilities) | |||
1452 | { | |||
1453 | fprintf (dump_file, "SMS loop-count "); | |||
1454 | fprintf (dump_file, "%" PRId64"l" "d", | |||
1455 | (int64_t) bb->count.to_gcov_type ()); | |||
1456 | fprintf (dump_file, "\n"); | |||
1457 | fprintf (dump_file, "SMS trip-count "); | |||
1458 | fprintf (dump_file, "%" PRId64"l" "d" "max %" PRId64"l" "d", | |||
1459 | (int64_t) trip_count, (int64_t) max_trip_count); | |||
1460 | fprintf (dump_file, "\n"); | |||
1461 | } | |||
1462 | } | |||
1463 | continue; | |||
1464 | } | |||
1465 | ||||
1466 | /* Make sure this is a doloop. */ | |||
1467 | if (!(count_reg = doloop_register_get (head, tail))) | |||
1468 | { | |||
1469 | if (dump_file) | |||
1470 | fprintf (dump_file, "SMS doloop_register_get failed\n"); | |||
1471 | continue; | |||
1472 | } | |||
1473 | ||||
1474 | /* Don't handle BBs with calls or barriers | |||
1475 | or !single_set with the exception of do-loop control part insns. | |||
1476 | ??? Should handle insns defining subregs. */ | |||
1477 | for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) | |||
1478 | { | |||
1479 | if (INSN_P (insn)(((((enum rtx_code) (insn)->code) == INSN) || (((enum rtx_code ) (insn)->code) == JUMP_INSN) || (((enum rtx_code) (insn)-> code) == CALL_INSN)) || (((enum rtx_code) (insn)->code) == DEBUG_INSN))) | |||
1480 | { | |||
1481 | HARD_REG_SET regs; | |||
1482 | CLEAR_HARD_REG_SET (regs); | |||
1483 | note_stores (insn, record_hard_reg_sets, ®s); | |||
1484 | if (hard_reg_set_intersect_p (regs, prohibited_regs)) | |||
1485 | break; | |||
1486 | } | |||
1487 | ||||
1488 | if (CALL_P (insn)(((enum rtx_code) (insn)->code) == CALL_INSN) | |||
1489 | || BARRIER_P (insn)(((enum rtx_code) (insn)->code) == BARRIER) | |||
1490 | || (INSN_P (insn)(((((enum rtx_code) (insn)->code) == INSN) || (((enum rtx_code ) (insn)->code) == JUMP_INSN) || (((enum rtx_code) (insn)-> code) == CALL_INSN)) || (((enum rtx_code) (insn)->code) == DEBUG_INSN)) && single_set (insn) | |||
1491 | && GET_CODE (SET_DEST (single_set (insn)))((enum rtx_code) ((((single_set (insn))->u.fld[0]).rt_rtx) )->code) == SUBREG) | |||
1492 | /* Not a single set. */ | |||
1493 | || (NONDEBUG_INSN_P (insn)((((enum rtx_code) (insn)->code) == INSN) || (((enum rtx_code ) (insn)->code) == JUMP_INSN) || (((enum rtx_code) (insn)-> code) == CALL_INSN)) && !JUMP_P (insn)(((enum rtx_code) (insn)->code) == JUMP_INSN) | |||
1494 | && !single_set (insn) && GET_CODE (PATTERN (insn))((enum rtx_code) (PATTERN (insn))->code) != USE | |||
1495 | /* But non-single-set allowed in one special case. */ | |||
1496 | && (insn != prev_nondebug_insn (tail) | |||
1497 | || !reg_mentioned_p (count_reg, insn)))) | |||
1498 | break; | |||
1499 | } | |||
1500 | ||||
1501 | if (insn != NEXT_INSN (tail)) | |||
1502 | { | |||
1503 | if (dump_file) | |||
1504 | { | |||
1505 | if (CALL_P (insn)(((enum rtx_code) (insn)->code) == CALL_INSN)) | |||
1506 | fprintf (dump_file, "SMS loop-with-call\n"); | |||
1507 | else if (BARRIER_P (insn)(((enum rtx_code) (insn)->code) == BARRIER)) | |||
1508 | fprintf (dump_file, "SMS loop-with-barrier\n"); | |||
1509 | else if (INSN_P (insn)(((((enum rtx_code) (insn)->code) == INSN) || (((enum rtx_code ) (insn)->code) == JUMP_INSN) || (((enum rtx_code) (insn)-> code) == CALL_INSN)) || (((enum rtx_code) (insn)->code) == DEBUG_INSN)) && single_set (insn) | |||
1510 | && GET_CODE (SET_DEST (single_set (insn)))((enum rtx_code) ((((single_set (insn))->u.fld[0]).rt_rtx) )->code) == SUBREG) | |||
1511 | fprintf (dump_file, "SMS loop with subreg in lhs\n"); | |||
1512 | else | |||
1513 | fprintf (dump_file, | |||
1514 | "SMS loop-with-not-single-set-or-prohibited-reg\n"); | |||
1515 | ||||
1516 | print_rtl_single (dump_file, insn); | |||
1517 | } | |||
1518 | ||||
1519 | continue; | |||
1520 | } | |||
1521 | ||||
1522 | /* Always schedule the closing branch with the rest of the | |||
1523 | instructions. The branch is rotated to be in row ii-1 at the | |||
1524 | end of the scheduling procedure to make sure it's the last | |||
1525 | instruction in the iteration. */ | |||
1526 | if (! (g = create_ddg (bb, 1))) | |||
1527 | { | |||
1528 | if (dump_file) | |||
1529 | fprintf (dump_file, "SMS create_ddg failed\n"); | |||
1530 | continue; | |||
1531 | } | |||
1532 | ||||
1533 | g_arr[loop->num] = g; | |||
1534 | if (dump_file) | |||
1535 | fprintf (dump_file, "...OK\n"); | |||
1536 | ||||
1537 | } | |||
1538 | if (dump_file) | |||
1539 | { | |||
1540 | fprintf (dump_file, "\nSMS transformation phase\n"); | |||
1541 | fprintf (dump_file, "=========================\n\n"); | |||
1542 | } | |||
1543 | ||||
1544 | /* We don't want to perform SMS on new loops - created by versioning. */ | |||
1545 | for (auto loop : loops_list (cfun(cfun + 0), 0)) | |||
1546 | { | |||
1547 | rtx_insn *head, *tail; | |||
1548 | rtx count_reg; | |||
1549 | rtx_insn *count_init; | |||
1550 | int mii, rec_mii, stage_count, min_cycle; | |||
1551 | int64_t loop_count = 0; | |||
1552 | bool opt_sc_p, adjust_inplace = false; | |||
1553 | basic_block pre_header; | |||
1554 | ||||
1555 | if (! (g = g_arr[loop->num])) | |||
1556 | continue; | |||
1557 | ||||
1558 | if (dump_file) | |||
1559 | { | |||
1560 | rtx_insn *insn = BB_END (loop->header)(loop->header)->il.x.rtl->end_; | |||
1561 | ||||
1562 | fprintf (dump_file, "SMS loop num: %d", loop->num); | |||
1563 | dump_insn_location (insn); | |||
1564 | fprintf (dump_file, "\n"); | |||
1565 | ||||
1566 | print_ddg (dump_file, g); | |||
1567 | } | |||
1568 | ||||
1569 | get_ebb_head_tail (loop->header, loop->header, &head, &tail); | |||
1570 | ||||
1571 | latch_edge = loop_latch_edge (loop); | |||
1572 | gcc_assert (single_exit (loop))((void)(!(single_exit (loop)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1572, __FUNCTION__), 0 : 0)); | |||
1573 | trip_count = get_estimated_loop_iterations_int (loop); | |||
1574 | max_trip_count = get_max_loop_iterations_int (loop); | |||
1575 | ||||
1576 | if (dump_file) | |||
1577 | { | |||
1578 | dump_insn_location (tail); | |||
1579 | fprintf (dump_file, "\nSMS single-bb-loop\n"); | |||
1580 | if (profile_info && flag_branch_probabilitiesglobal_options.x_flag_branch_probabilities) | |||
1581 | { | |||
1582 | fprintf (dump_file, "SMS loop-count "); | |||
1583 | fprintf (dump_file, "%" PRId64"l" "d", | |||
1584 | (int64_t) bb->count.to_gcov_type ()); | |||
| ||||
1585 | fprintf (dump_file, "\n"); | |||
1586 | } | |||
1587 | fprintf (dump_file, "SMS doloop\n"); | |||
1588 | fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes); | |||
1589 | fprintf (dump_file, "SMS num-loads %d\n", g->num_loads); | |||
1590 | fprintf (dump_file, "SMS num-stores %d\n", g->num_stores); | |||
1591 | } | |||
1592 | ||||
1593 | ||||
1594 | count_reg = doloop_register_get (head, tail); | |||
1595 | gcc_assert (count_reg)((void)(!(count_reg) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1595, __FUNCTION__), 0 : 0)); | |||
1596 | ||||
1597 | pre_header = loop_preheader_edge (loop)->src; | |||
1598 | count_init = const_iteration_count (count_reg, pre_header, &loop_count, | |||
1599 | &adjust_inplace); | |||
1600 | ||||
1601 | if (dump_file && count_init) | |||
1602 | { | |||
1603 | fprintf (dump_file, "SMS const-doloop "); | |||
1604 | fprintf (dump_file, "%" PRId64"l" "d", | |||
1605 | loop_count); | |||
1606 | fprintf (dump_file, "\n"); | |||
1607 | } | |||
1608 | ||||
1609 | node_order = XNEWVEC (int, g->num_nodes)((int *) xmalloc (sizeof (int) * (g->num_nodes))); | |||
1610 | ||||
1611 | mii = 1; /* Need to pass some estimate of mii. */ | |||
1612 | rec_mii = sms_order_nodes (g, mii, node_order, &max_asap); | |||
1613 | mii = MAX (res_MII (g), rec_mii)((res_MII (g)) > (rec_mii) ? (res_MII (g)) : (rec_mii)); | |||
1614 | mii = MAX (mii, 1)((mii) > (1) ? (mii) : (1)); | |||
1615 | maxii = MAX (max_asap, param_sms_max_ii_factor * mii)((max_asap) > (global_options.x_param_sms_max_ii_factor * mii ) ? (max_asap) : (global_options.x_param_sms_max_ii_factor * mii )); | |||
1616 | ||||
1617 | if (dump_file) | |||
1618 | fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", | |||
1619 | rec_mii, mii, maxii); | |||
1620 | ||||
1621 | for (;;) | |||
1622 | { | |||
1623 | set_node_sched_params (g); | |||
1624 | ||||
1625 | stage_count = 0; | |||
1626 | opt_sc_p = false; | |||
1627 | ps = sms_schedule_by_order (g, mii, maxii, node_order); | |||
1628 | ||||
1629 | if (ps) | |||
1630 | { | |||
1631 | /* Try to achieve optimized SC by normalizing the partial | |||
1632 | schedule (having the cycles start from cycle zero). | |||
1633 | The branch location must be placed in row ii-1 in the | |||
1634 | final scheduling. If failed, shift all instructions to | |||
1635 | position the branch in row ii-1. */ | |||
1636 | opt_sc_p = optimize_sc (ps, g); | |||
1637 | if (opt_sc_p) | |||
1638 | stage_count = calculate_stage_count (ps, 0); | |||
1639 | else | |||
1640 | { | |||
1641 | /* Bring the branch to cycle ii-1. */ | |||
1642 | int amount = (SCHED_TIME (g->closing_branch->cuid)((&node_sched_param_vec[g->closing_branch->cuid])-> time) | |||
1643 | - (ps->ii - 1)); | |||
1644 | ||||
1645 | if (dump_file) | |||
1646 | fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); | |||
1647 | ||||
1648 | stage_count = calculate_stage_count (ps, amount); | |||
1649 | } | |||
1650 | ||||
1651 | gcc_assert (stage_count >= 1)((void)(!(stage_count >= 1) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1651, __FUNCTION__), 0 : 0)); | |||
1652 | } | |||
1653 | ||||
1654 | /* The default value of param_sms_min_sc is 2 as stage count of | |||
1655 | 1 means that there is no interleaving between iterations thus | |||
1656 | we let the scheduling passes do the job in this case. */ | |||
1657 | if (stage_count < param_sms_min_scglobal_options.x_param_sms_min_sc | |||
1658 | || (count_init && (loop_count <= stage_count)) | |||
1659 | || (max_trip_count >= 0 && max_trip_count <= stage_count) | |||
1660 | || (trip_count >= 0 && trip_count <= stage_count)) | |||
1661 | { | |||
1662 | if (dump_file) | |||
1663 | { | |||
1664 | fprintf (dump_file, "SMS failed... \n"); | |||
1665 | fprintf (dump_file, "SMS sched-failed (stage-count=%d," | |||
1666 | " loop-count=", stage_count); | |||
1667 | fprintf (dump_file, "%" PRId64"l" "d", loop_count); | |||
1668 | fprintf (dump_file, ", trip-count="); | |||
1669 | fprintf (dump_file, "%" PRId64"l" "d" "max %" PRId64"l" "d", | |||
1670 | (int64_t) trip_count, (int64_t) max_trip_count); | |||
1671 | fprintf (dump_file, ")\n"); | |||
1672 | } | |||
1673 | break; | |||
1674 | } | |||
1675 | ||||
1676 | if (!opt_sc_p) | |||
1677 | { | |||
1678 | /* Rotate the partial schedule to have the branch in row ii-1. */ | |||
1679 | int amount = SCHED_TIME (g->closing_branch->cuid)((&node_sched_param_vec[g->closing_branch->cuid])-> time) - (ps->ii - 1); | |||
1680 | ||||
1681 | reset_sched_times (ps, amount); | |||
1682 | rotate_partial_schedule (ps, amount); | |||
1683 | } | |||
1684 | ||||
1685 | set_columns_for_ps (ps); | |||
1686 | ||||
1687 | min_cycle = PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle) - SMODULO (PS_MIN_CYCLE (ps), ps->ii)(((((partial_schedule_ptr)(ps))->min_cycle)) % (ps->ii) < 0 ? (((((partial_schedule_ptr)(ps))->min_cycle)) % ( ps->ii) + (ps->ii)) : ((((partial_schedule_ptr)(ps))-> min_cycle)) % (ps->ii)); | |||
1688 | if (!schedule_reg_moves (ps)) | |||
1689 | { | |||
1690 | mii = ps->ii + 1; | |||
1691 | free_partial_schedule (ps); | |||
1692 | continue; | |||
1693 | } | |||
1694 | ||||
1695 | /* Moves that handle incoming values might have been added | |||
1696 | to a new first stage. Bump the stage count if so. | |||
1697 | ||||
1698 | ??? Perhaps we could consider rotating the schedule here | |||
1699 | instead? */ | |||
1700 | if (PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle) < min_cycle) | |||
1701 | { | |||
1702 | reset_sched_times (ps, 0); | |||
1703 | stage_count++; | |||
1704 | } | |||
1705 | ||||
1706 | /* The stage count should now be correct without rotation. */ | |||
1707 | gcc_checking_assert (stage_count == calculate_stage_count (ps, 0))((void)(!(stage_count == calculate_stage_count (ps, 0)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 1707, __FUNCTION__), 0 : 0)); | |||
1708 | PS_STAGE_COUNT (ps)(((partial_schedule_ptr)(ps))->stage_count) = stage_count; | |||
1709 | ||||
1710 | canon_loop (loop); | |||
1711 | ||||
1712 | if (dump_file) | |||
1713 | { | |||
1714 | dump_insn_location (tail); | |||
1715 | fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n", | |||
1716 | ps->ii, stage_count); | |||
1717 | print_partial_schedule (ps, dump_file); | |||
1718 | } | |||
1719 | ||||
1720 | if (count_init) | |||
1721 | { | |||
1722 | if (adjust_inplace) | |||
1723 | { | |||
1724 | /* When possible, set new iteration count of loop kernel in | |||
1725 | place. Otherwise, generate_prolog_epilog creates an insn | |||
1726 | to adjust. */ | |||
1727 | SET_SRC (single_set (count_init))(((single_set (count_init))->u.fld[1]).rt_rtx) = GEN_INT (loop_countgen_rtx_CONST_INT (((void) 0, E_VOIDmode), (loop_count - stage_count + 1)) | |||
1728 | - stage_count + 1)gen_rtx_CONST_INT (((void) 0, E_VOIDmode), (loop_count - stage_count + 1)); | |||
1729 | } | |||
1730 | } | |||
1731 | else | |||
1732 | { | |||
1733 | /* case the BCT count is not known , Do loop-versioning */ | |||
1734 | rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,gen_rtx_fmt_ee_stat ((GT), ((((void) 0, E_VOIDmode))), ((count_reg )), ((gen_int_mode (stage_count, ((machine_mode) (count_reg)-> mode)))) ) | |||
1735 | gen_int_mode (stage_count,gen_rtx_fmt_ee_stat ((GT), ((((void) 0, E_VOIDmode))), ((count_reg )), ((gen_int_mode (stage_count, ((machine_mode) (count_reg)-> mode)))) ) | |||
1736 | GET_MODE (count_reg)))gen_rtx_fmt_ee_stat ((GT), ((((void) 0, E_VOIDmode))), ((count_reg )), ((gen_int_mode (stage_count, ((machine_mode) (count_reg)-> mode)))) ); | |||
1737 | profile_probability prob = profile_probability::guessed_always () | |||
1738 | .apply_scale (PROB_SMS_ENOUGH_ITERATIONS80, 100); | |||
1739 | ||||
1740 | loop_version (loop, comp_rtx, &condition_bb, | |||
1741 | prob, prob.invert (), | |||
1742 | prob, prob.invert (), true); | |||
1743 | } | |||
1744 | ||||
1745 | /* Now apply the scheduled kernel to the RTL of the loop. */ | |||
1746 | permute_partial_schedule (ps, g->closing_branch->first_note); | |||
1747 | ||||
1748 | /* Mark this loop as software pipelined so the later | |||
1749 | scheduling passes don't touch it. */ | |||
1750 | if (! flag_resched_modulo_schedglobal_options.x_flag_resched_modulo_sched) | |||
1751 | mark_loop_unsched (loop); | |||
1752 | ||||
1753 | /* The life-info is not valid any more. */ | |||
1754 | df_set_bb_dirty (g->bb); | |||
1755 | ||||
1756 | apply_reg_moves (ps); | |||
1757 | if (dump_file) | |||
1758 | print_node_sched_params (dump_file, g->num_nodes, ps); | |||
1759 | /* Generate prolog and epilog. */ | |||
1760 | generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace); | |||
1761 | break; | |||
1762 | } | |||
1763 | ||||
1764 | free_partial_schedule (ps); | |||
1765 | node_sched_param_vec.release (); | |||
1766 | free (node_order); | |||
1767 | free_ddg (g); | |||
1768 | } | |||
1769 | ||||
1770 | free (g_arr); | |||
1771 | ||||
1772 | /* Release scheduler data, needed until now because of DFA. */ | |||
1773 | haifa_sched_finish (); | |||
1774 | loop_optimizer_finalize (); | |||
1775 | } | |||
1776 | ||||
1777 | /* The SMS scheduling algorithm itself | |||
1778 | ----------------------------------- | |||
1779 | Input: 'O' an ordered list of insns of a loop. | |||
1780 | Output: A scheduling of the loop - kernel, prolog, and epilogue. | |||
1781 | ||||
1782 | 'Q' is the empty Set | |||
1783 | 'PS' is the partial schedule; it holds the currently scheduled nodes with | |||
1784 | their cycle/slot. | |||
1785 | 'PSP' previously scheduled predecessors. | |||
1786 | 'PSS' previously scheduled successors. | |||
1787 | 't(u)' the cycle where u is scheduled. | |||
1788 | 'l(u)' is the latency of u. | |||
1789 | 'd(v,u)' is the dependence distance from v to u. | |||
1790 | 'ASAP(u)' the earliest time at which u could be scheduled as computed in | |||
1791 | the node ordering phase. | |||
1792 | 'check_hardware_resources_conflicts(u, PS, c)' | |||
1793 | run a trace around cycle/slot through DFA model | |||
1794 | to check resource conflicts involving instruction u | |||
1795 | at cycle c given the partial schedule PS. | |||
1796 | 'add_to_partial_schedule_at_time(u, PS, c)' | |||
1797 | Add the node/instruction u to the partial schedule | |||
1798 | PS at time c. | |||
1799 | 'calculate_register_pressure(PS)' | |||
1800 | Given a schedule of instructions, calculate the register | |||
1801 | pressure it implies. One implementation could be the | |||
1802 | maximum number of overlapping live ranges. | |||
1803 | 'maxRP' The maximum allowed register pressure, it is usually derived from the number | |||
1804 | registers available in the hardware. | |||
1805 | ||||
1806 | 1. II = MII. | |||
1807 | 2. PS = empty list | |||
1808 | 3. for each node u in O in pre-computed order | |||
1809 | 4. if (PSP(u) != Q && PSS(u) == Q) then | |||
1810 | 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |||
1811 | 6. start = Early_start; end = Early_start + II - 1; step = 1 | |||
1812 | 11. else if (PSP(u) == Q && PSS(u) != Q) then | |||
1813 | 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |||
1814 | 13. start = Late_start; end = Late_start - II + 1; step = -1 | |||
1815 | 14. else if (PSP(u) != Q && PSS(u) != Q) then | |||
1816 | 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |||
1817 | 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |||
1818 | 17. start = Early_start; | |||
1819 | 18. end = min(Early_start + II - 1 , Late_start); | |||
1820 | 19. step = 1 | |||
1821 | 20. else "if (PSP(u) == Q && PSS(u) == Q)" | |||
1822 | 21. start = ASAP(u); end = start + II - 1; step = 1 | |||
1823 | 22. endif | |||
1824 | ||||
1825 | 23. success = false | |||
1826 | 24. for (c = start ; c != end ; c += step) | |||
1827 | 25. if check_hardware_resources_conflicts(u, PS, c) then | |||
1828 | 26. add_to_partial_schedule_at_time(u, PS, c) | |||
1829 | 27. success = true | |||
1830 | 28. break | |||
1831 | 29. endif | |||
1832 | 30. endfor | |||
1833 | 31. if (success == false) then | |||
1834 | 32. II = II + 1 | |||
1835 | 33. if (II > maxII) then | |||
1836 | 34. finish - failed to schedule | |||
1837 | 35. endif | |||
1838 | 36. goto 2. | |||
1839 | 37. endif | |||
1840 | 38. endfor | |||
1841 | 39. if (calculate_register_pressure(PS) > maxRP) then | |||
1842 | 40. goto 32. | |||
1843 | 41. endif | |||
1844 | 42. compute epilogue & prologue | |||
1845 | 43. finish - succeeded to schedule | |||
1846 | ||||
1847 | ??? The algorithm restricts the scheduling window to II cycles. | |||
1848 | In rare cases, it may be better to allow windows of II+1 cycles. | |||
1849 | The window would then start and end on the same row, but with | |||
1850 | different "must precede" and "must follow" requirements. */ | |||
1851 | ||||
1852 | /* A threshold for the number of repeated unsuccessful attempts to insert | |||
1853 | an empty row, before we flush the partial schedule and start over. */ | |||
1854 | #define MAX_SPLIT_NUM10 10 | |||
1855 | /* Given the partial schedule PS, this function calculates and returns the | |||
1856 | cycles in which we can schedule the node with the given index I. | |||
1857 | NOTE: Here we do the backtracking in SMS, in some special cases. We have | |||
1858 | noticed that there are several cases in which we fail to SMS the loop | |||
1859 | because the sched window of a node is empty due to tight data-deps. In | |||
1860 | such cases we want to unschedule some of the predecessors/successors | |||
1861 | until we get non-empty scheduling window. It returns -1 if the | |||
1862 | scheduling window is empty and zero otherwise. */ | |||
1863 | ||||
1864 | static int | |||
1865 | get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node, | |||
1866 | sbitmap sched_nodes, int ii, int *start_p, int *step_p, | |||
1867 | int *end_p) | |||
1868 | { | |||
1869 | int start, step, end; | |||
1870 | int early_start, late_start; | |||
1871 | ddg_edge_ptr e; | |||
1872 | auto_sbitmap psp (ps->g->num_nodes); | |||
1873 | auto_sbitmap pss (ps->g->num_nodes); | |||
1874 | sbitmap u_node_preds = NODE_PREDECESSORS (u_node)((u_node)->predecessors); | |||
1875 | sbitmap u_node_succs = NODE_SUCCESSORS (u_node)((u_node)->successors); | |||
1876 | int psp_not_empty; | |||
1877 | int pss_not_empty; | |||
1878 | int count_preds; | |||
1879 | int count_succs; | |||
1880 | ||||
1881 | /* 1. compute sched window for u (start, end, step). */ | |||
1882 | bitmap_clear (psp); | |||
1883 | bitmap_clear (pss); | |||
1884 | psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes); | |||
1885 | pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes); | |||
1886 | ||||
1887 | /* We first compute a forward range (start <= end), then decide whether | |||
1888 | to reverse it. */ | |||
1889 | early_start = INT_MIN(-2147483647 -1); | |||
1890 | late_start = INT_MAX2147483647; | |||
1891 | start = INT_MIN(-2147483647 -1); | |||
1892 | end = INT_MAX2147483647; | |||
1893 | step = 1; | |||
1894 | ||||
1895 | count_preds = 0; | |||
1896 | count_succs = 0; | |||
1897 | ||||
1898 | if (dump_file && (psp_not_empty || pss_not_empty)) | |||
1899 | { | |||
1900 | fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)" | |||
1901 | "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii); | |||
1902 | fprintf (dump_file, "%11s %11s %11s %11s %5s\n", | |||
1903 | "start", "early start", "late start", "end", "time"); | |||
1904 | fprintf (dump_file, "=========== =========== =========== ===========" | |||
1905 | " =====\n"); | |||
1906 | } | |||
1907 | /* Calculate early_start and limit end. Both bounds are inclusive. */ | |||
1908 | if (psp_not_empty) | |||
1909 | for (e = u_node->in; e != 0; e = e->next_in) | |||
1910 | { | |||
1911 | int v = e->src->cuid; | |||
1912 | ||||
1913 | if (bitmap_bit_p (sched_nodes, v)) | |||
1914 | { | |||
1915 | int p_st = SCHED_TIME (v)((&node_sched_param_vec[v])->time); | |||
1916 | int earliest = p_st + e->latency - (e->distance * ii); | |||
1917 | int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX2147483647); | |||
1918 | ||||
1919 | if (dump_file) | |||
1920 | { | |||
1921 | fprintf (dump_file, "%11s %11d %11s %11d %5d", | |||
1922 | "", earliest, "", latest, p_st); | |||
1923 | print_ddg_edge (dump_file, e); | |||
1924 | fprintf (dump_file, "\n"); | |||
1925 | } | |||
1926 | ||||
1927 | early_start = MAX (early_start, earliest)((early_start) > (earliest) ? (early_start) : (earliest)); | |||
1928 | end = MIN (end, latest)((end) < (latest) ? (end) : (latest)); | |||
1929 | ||||
1930 | if (e->type == TRUE_DEP && e->data_type == REG_DEP) | |||
1931 | count_preds++; | |||
1932 | } | |||
1933 | } | |||
1934 | ||||
1935 | /* Calculate late_start and limit start. Both bounds are inclusive. */ | |||
1936 | if (pss_not_empty) | |||
1937 | for (e = u_node->out; e != 0; e = e->next_out) | |||
1938 | { | |||
1939 | int v = e->dest->cuid; | |||
1940 | ||||
1941 | if (bitmap_bit_p (sched_nodes, v)) | |||
1942 | { | |||
1943 | int s_st = SCHED_TIME (v)((&node_sched_param_vec[v])->time); | |||
1944 | int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN(-2147483647 -1)); | |||
1945 | int latest = s_st - e->latency + (e->distance * ii); | |||
1946 | ||||
1947 | if (dump_file) | |||
1948 | { | |||
1949 | fprintf (dump_file, "%11d %11s %11d %11s %5d", | |||
1950 | earliest, "", latest, "", s_st); | |||
1951 | print_ddg_edge (dump_file, e); | |||
1952 | fprintf (dump_file, "\n"); | |||
1953 | } | |||
1954 | ||||
1955 | start = MAX (start, earliest)((start) > (earliest) ? (start) : (earliest)); | |||
1956 | late_start = MIN (late_start, latest)((late_start) < (latest) ? (late_start) : (latest)); | |||
1957 | ||||
1958 | if (e->type == TRUE_DEP && e->data_type == REG_DEP) | |||
1959 | count_succs++; | |||
1960 | } | |||
1961 | } | |||
1962 | ||||
1963 | if (dump_file && (psp_not_empty || pss_not_empty)) | |||
1964 | { | |||
1965 | fprintf (dump_file, "----------- ----------- ----------- -----------" | |||
1966 | " -----\n"); | |||
1967 | fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n", | |||
1968 | start, early_start, late_start, end, "", | |||
1969 | "(max, max, min, min)"); | |||
1970 | } | |||
1971 | ||||
1972 | /* Get a target scheduling window no bigger than ii. */ | |||
1973 | if (early_start == INT_MIN(-2147483647 -1) && late_start == INT_MAX2147483647) | |||
1974 | early_start = NODE_ASAP (u_node)((u_node)->aux.count); | |||
1975 | else if (early_start == INT_MIN(-2147483647 -1)) | |||
1976 | early_start = late_start - (ii - 1); | |||
1977 | late_start = MIN (late_start, early_start + (ii - 1))((late_start) < (early_start + (ii - 1)) ? (late_start) : ( early_start + (ii - 1))); | |||
1978 | ||||
1979 | /* Apply memory dependence limits. */ | |||
1980 | start = MAX (start, early_start)((start) > (early_start) ? (start) : (early_start)); | |||
1981 | end = MIN (end, late_start)((end) < (late_start) ? (end) : (late_start)); | |||
1982 | ||||
1983 | if (dump_file && (psp_not_empty || pss_not_empty)) | |||
1984 | fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n", | |||
1985 | "", start, end, "", ""); | |||
1986 | ||||
1987 | /* If there are at least as many successors as predecessors, schedule the | |||
1988 | node close to its successors. */ | |||
1989 | if (pss_not_empty && count_succs >= count_preds) | |||
1990 | { | |||
1991 | std::swap (start, end); | |||
1992 | step = -1; | |||
1993 | } | |||
1994 | ||||
1995 | /* Now that we've finalized the window, make END an exclusive rather | |||
1996 | than an inclusive bound. */ | |||
1997 | end += step; | |||
1998 | ||||
1999 | *start_p = start; | |||
2000 | *step_p = step; | |||
2001 | *end_p = end; | |||
2002 | ||||
2003 | if ((start >= end && step == 1) || (start <= end && step == -1)) | |||
2004 | { | |||
2005 | if (dump_file) | |||
2006 | fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", | |||
2007 | start, end, step); | |||
2008 | return -1; | |||
2009 | } | |||
2010 | ||||
2011 | return 0; | |||
2012 | } | |||
2013 | ||||
2014 | /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the | |||
2015 | node currently been scheduled. At the end of the calculation | |||
2016 | MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of | |||
2017 | U_NODE which are (1) already scheduled in the first/last row of | |||
2018 | U_NODE's scheduling window, (2) whose dependence inequality with U | |||
2019 | becomes an equality when U is scheduled in this same row, and (3) | |||
2020 | whose dependence latency is zero. | |||
2021 | ||||
2022 | The first and last rows are calculated using the following parameters: | |||
2023 | START/END rows - The cycles that begins/ends the traversal on the window; | |||
2024 | searching for an empty cycle to schedule U_NODE. | |||
2025 | STEP - The direction in which we traverse the window. | |||
2026 | II - The initiation interval. */ | |||
2027 | ||||
2028 | static void | |||
2029 | calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end, | |||
2030 | int step, int ii, sbitmap sched_nodes, | |||
2031 | sbitmap must_precede, sbitmap must_follow) | |||
2032 | { | |||
2033 | ddg_edge_ptr e; | |||
2034 | int first_cycle_in_window, last_cycle_in_window; | |||
2035 | ||||
2036 | gcc_assert (must_precede && must_follow)((void)(!(must_precede && must_follow) ? fancy_abort ( "/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2036, __FUNCTION__), 0 : 0)); | |||
2037 | ||||
2038 | /* Consider the following scheduling window: | |||
2039 | {first_cycle_in_window, first_cycle_in_window+1, ..., | |||
2040 | last_cycle_in_window}. If step is 1 then the following will be | |||
2041 | the order we traverse the window: {start=first_cycle_in_window, | |||
2042 | first_cycle_in_window+1, ..., end=last_cycle_in_window+1}, | |||
2043 | or {start=last_cycle_in_window, last_cycle_in_window-1, ..., | |||
2044 | end=first_cycle_in_window-1} if step is -1. */ | |||
2045 | first_cycle_in_window = (step == 1) ? start : end - step; | |||
2046 | last_cycle_in_window = (step == 1) ? end - step : start; | |||
2047 | ||||
2048 | bitmap_clear (must_precede); | |||
2049 | bitmap_clear (must_follow); | |||
2050 | ||||
2051 | if (dump_file) | |||
2052 | fprintf (dump_file, "\nmust_precede: "); | |||
2053 | ||||
2054 | /* Instead of checking if: | |||
2055 | (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window) | |||
2056 | && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) == | |||
2057 | first_cycle_in_window) | |||
2058 | && e->latency == 0 | |||
2059 | we use the fact that latency is non-negative: | |||
2060 | SCHED_TIME (e->src) - (e->distance * ii) <= | |||
2061 | SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <= | |||
2062 | first_cycle_in_window | |||
2063 | and check only if | |||
2064 | SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ | |||
2065 | for (e = u_node->in; e != 0; e = e->next_in) | |||
2066 | if (bitmap_bit_p (sched_nodes, e->src->cuid) | |||
2067 | && ((SCHED_TIME (e->src->cuid)((&node_sched_param_vec[e->src->cuid])->time) - (e->distance * ii)) == | |||
2068 | first_cycle_in_window)) | |||
2069 | { | |||
2070 | if (dump_file) | |||
2071 | fprintf (dump_file, "%d ", e->src->cuid); | |||
2072 | ||||
2073 | bitmap_set_bit (must_precede, e->src->cuid); | |||
2074 | } | |||
2075 | ||||
2076 | if (dump_file) | |||
2077 | fprintf (dump_file, "\nmust_follow: "); | |||
2078 | ||||
2079 | /* Instead of checking if: | |||
2080 | (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window) | |||
2081 | && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) == | |||
2082 | last_cycle_in_window) | |||
2083 | && e->latency == 0 | |||
2084 | we use the fact that latency is non-negative: | |||
2085 | SCHED_TIME (e->dest) + (e->distance * ii) >= | |||
2086 | SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >= | |||
2087 | last_cycle_in_window | |||
2088 | and check only if | |||
2089 | SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ | |||
2090 | for (e = u_node->out; e != 0; e = e->next_out) | |||
2091 | if (bitmap_bit_p (sched_nodes, e->dest->cuid) | |||
2092 | && ((SCHED_TIME (e->dest->cuid)((&node_sched_param_vec[e->dest->cuid])->time) + (e->distance * ii)) == | |||
2093 | last_cycle_in_window)) | |||
2094 | { | |||
2095 | if (dump_file) | |||
2096 | fprintf (dump_file, "%d ", e->dest->cuid); | |||
2097 | ||||
2098 | bitmap_set_bit (must_follow, e->dest->cuid); | |||
2099 | } | |||
2100 | ||||
2101 | if (dump_file) | |||
2102 | fprintf (dump_file, "\n"); | |||
2103 | } | |||
2104 | ||||
2105 | /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following | |||
2106 | parameters to decide if that's possible: | |||
2107 | PS - The partial schedule. | |||
2108 | U - The serial number of U_NODE. | |||
2109 | NUM_SPLITS - The number of row splits made so far. | |||
2110 | MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at | |||
2111 | the first row of the scheduling window) | |||
2112 | MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the | |||
2113 | last row of the scheduling window) */ | |||
2114 | ||||
2115 | static bool | |||
2116 | try_scheduling_node_in_cycle (partial_schedule_ptr ps, | |||
2117 | int u, int cycle, sbitmap sched_nodes, | |||
2118 | int *num_splits, sbitmap must_precede, | |||
2119 | sbitmap must_follow) | |||
2120 | { | |||
2121 | ps_insn_ptr psi; | |||
2122 | bool success = 0; | |||
2123 | ||||
2124 | verify_partial_schedule (ps, sched_nodes); | |||
2125 | psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow); | |||
2126 | if (psi) | |||
2127 | { | |||
2128 | SCHED_TIME (u)((&node_sched_param_vec[u])->time) = cycle; | |||
2129 | bitmap_set_bit (sched_nodes, u); | |||
2130 | success = 1; | |||
2131 | *num_splits = 0; | |||
2132 | if (dump_file) | |||
2133 | fprintf (dump_file, "Scheduled w/o split in %d\n", cycle); | |||
2134 | ||||
2135 | } | |||
2136 | ||||
2137 | return success; | |||
2138 | } | |||
2139 | ||||
2140 | /* This function implements the scheduling algorithm for SMS according to the | |||
2141 | above algorithm. */ | |||
2142 | static partial_schedule_ptr | |||
2143 | sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order) | |||
2144 | { | |||
2145 | int ii = mii; | |||
2146 | int i, c, success, num_splits = 0; | |||
2147 | int flush_and_start_over = true; | |||
2148 | int num_nodes = g->num_nodes; | |||
2149 | int start, end, step; /* Place together into one struct? */ | |||
2150 | auto_sbitmap sched_nodes (num_nodes); | |||
2151 | auto_sbitmap must_precede (num_nodes); | |||
2152 | auto_sbitmap must_follow (num_nodes); | |||
2153 | auto_sbitmap tobe_scheduled (num_nodes); | |||
2154 | ||||
2155 | /* Value of param_sms_dfa_history is a limit on the number of cycles that | |||
2156 | resource conflicts can span. ??? Should be provided by DFA, and be | |||
2157 | dependent on the type of insn scheduled. Set to 0 by default to save | |||
2158 | compile time. */ | |||
2159 | partial_schedule_ptr ps = create_partial_schedule (ii, g, | |||
2160 | param_sms_dfa_historyglobal_options.x_param_sms_dfa_history); | |||
2161 | ||||
2162 | bitmap_ones (tobe_scheduled); | |||
2163 | bitmap_clear (sched_nodes); | |||
2164 | ||||
2165 | while (flush_and_start_over && (ii < maxii)) | |||
2166 | { | |||
2167 | ||||
2168 | if (dump_file) | |||
2169 | fprintf (dump_file, "Starting with ii=%d\n", ii); | |||
2170 | flush_and_start_over = false; | |||
2171 | bitmap_clear (sched_nodes); | |||
2172 | ||||
2173 | for (i = 0; i < num_nodes; i++) | |||
2174 | { | |||
2175 | int u = nodes_order[i]; | |||
2176 | ddg_node_ptr u_node = &ps->g->nodes[u]; | |||
2177 | rtx_insn *insn = u_node->insn; | |||
2178 | ||||
2179 | gcc_checking_assert (NONDEBUG_INSN_P (insn))((void)(!(((((enum rtx_code) (insn)->code) == INSN) || ((( enum rtx_code) (insn)->code) == JUMP_INSN) || (((enum rtx_code ) (insn)->code) == CALL_INSN))) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2179, __FUNCTION__), 0 : 0)); | |||
2180 | ||||
2181 | if (bitmap_bit_p (sched_nodes, u)) | |||
2182 | continue; | |||
2183 | ||||
2184 | /* Try to get non-empty scheduling window. */ | |||
2185 | success = 0; | |||
2186 | if (get_sched_window (ps, u_node, sched_nodes, ii, &start, | |||
2187 | &step, &end) == 0) | |||
2188 | { | |||
2189 | if (dump_file) | |||
2190 | fprintf (dump_file, "\nTrying to schedule node %d " | |||
2191 | "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID | |||
2192 | (g->nodes[u].insn)), start, end, step); | |||
2193 | ||||
2194 | gcc_assert ((step > 0 && start < end)((void)(!((step > 0 && start < end) || (step < 0 && start > end)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2195, __FUNCTION__), 0 : 0)) | |||
2195 | || (step < 0 && start > end))((void)(!((step > 0 && start < end) || (step < 0 && start > end)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2195, __FUNCTION__), 0 : 0)); | |||
2196 | ||||
2197 | calculate_must_precede_follow (u_node, start, end, step, ii, | |||
2198 | sched_nodes, must_precede, | |||
2199 | must_follow); | |||
2200 | ||||
2201 | for (c = start; c != end; c += step) | |||
2202 | { | |||
2203 | sbitmap tmp_precede, tmp_follow; | |||
2204 | ||||
2205 | set_must_precede_follow (&tmp_follow, must_follow, | |||
2206 | &tmp_precede, must_precede, | |||
2207 | c, start, end, step); | |||
2208 | success = | |||
2209 | try_scheduling_node_in_cycle (ps, u, c, | |||
2210 | sched_nodes, | |||
2211 | &num_splits, tmp_precede, | |||
2212 | tmp_follow); | |||
2213 | if (success) | |||
2214 | break; | |||
2215 | } | |||
2216 | ||||
2217 | verify_partial_schedule (ps, sched_nodes); | |||
2218 | } | |||
2219 | if (!success) | |||
2220 | { | |||
2221 | int split_row; | |||
2222 | ||||
2223 | if (ii++ == maxii) | |||
2224 | break; | |||
2225 | ||||
2226 | if (num_splits >= MAX_SPLIT_NUM10) | |||
2227 | { | |||
2228 | num_splits = 0; | |||
2229 | flush_and_start_over = true; | |||
2230 | verify_partial_schedule (ps, sched_nodes); | |||
2231 | reset_partial_schedule (ps, ii); | |||
2232 | verify_partial_schedule (ps, sched_nodes); | |||
2233 | break; | |||
2234 | } | |||
2235 | ||||
2236 | num_splits++; | |||
2237 | /* The scheduling window is exclusive of 'end' | |||
2238 | whereas compute_split_window() expects an inclusive, | |||
2239 | ordered range. */ | |||
2240 | if (step == 1) | |||
2241 | split_row = compute_split_row (sched_nodes, start, end - 1, | |||
2242 | ps->ii, u_node); | |||
2243 | else | |||
2244 | split_row = compute_split_row (sched_nodes, end + 1, start, | |||
2245 | ps->ii, u_node); | |||
2246 | ||||
2247 | ps_insert_empty_row (ps, split_row, sched_nodes); | |||
2248 | i--; /* Go back and retry node i. */ | |||
2249 | ||||
2250 | if (dump_file) | |||
2251 | fprintf (dump_file, "num_splits=%d\n", num_splits); | |||
2252 | } | |||
2253 | ||||
2254 | /* ??? If (success), check register pressure estimates. */ | |||
2255 | } /* Continue with next node. */ | |||
2256 | } /* While flush_and_start_over. */ | |||
2257 | if (ii >= maxii) | |||
2258 | { | |||
2259 | free_partial_schedule (ps); | |||
2260 | ps = NULLnullptr; | |||
2261 | } | |||
2262 | else | |||
2263 | gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes))((void)(!(bitmap_equal_p (tobe_scheduled, sched_nodes)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2263, __FUNCTION__), 0 : 0)); | |||
2264 | ||||
2265 | return ps; | |||
2266 | } | |||
2267 | ||||
2268 | /* This function inserts a new empty row into PS at the position | |||
2269 | according to SPLITROW, keeping all already scheduled instructions | |||
2270 | intact and updating their SCHED_TIME and cycle accordingly. */ | |||
2271 | static void | |||
2272 | ps_insert_empty_row (partial_schedule_ptr ps, int split_row, | |||
2273 | sbitmap sched_nodes) | |||
2274 | { | |||
2275 | ps_insn_ptr crr_insn; | |||
2276 | ps_insn_ptr *rows_new; | |||
2277 | int ii = ps->ii; | |||
2278 | int new_ii = ii + 1; | |||
2279 | int row; | |||
2280 | int *rows_length_new; | |||
2281 | ||||
2282 | verify_partial_schedule (ps, sched_nodes); | |||
2283 | ||||
2284 | /* We normalize sched_time and rotate ps to have only non-negative sched | |||
2285 | times, for simplicity of updating cycles after inserting new row. */ | |||
2286 | split_row -= ps->min_cycle; | |||
2287 | split_row = SMODULO (split_row, ii)((split_row) % (ii) < 0 ? ((split_row) % (ii) + (ii)) : (split_row ) % (ii)); | |||
2288 | if (dump_file) | |||
2289 | fprintf (dump_file, "split_row=%d\n", split_row); | |||
2290 | ||||
2291 | reset_sched_times (ps, PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle)); | |||
2292 | rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle)); | |||
2293 | ||||
2294 | rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); | |||
2295 | rows_length_new = (int *) xcalloc (new_ii, sizeof (int)); | |||
2296 | for (row = 0; row < split_row; row++) | |||
2297 | { | |||
2298 | rows_new[row] = ps->rows[row]; | |||
2299 | rows_length_new[row] = ps->rows_length[row]; | |||
2300 | ps->rows[row] = NULLnullptr; | |||
2301 | for (crr_insn = rows_new[row]; | |||
2302 | crr_insn; crr_insn = crr_insn->next_in_row) | |||
2303 | { | |||
2304 | int u = crr_insn->id; | |||
2305 | int new_time = SCHED_TIME (u)((&node_sched_param_vec[u])->time) + (SCHED_TIME (u)((&node_sched_param_vec[u])->time) / ii); | |||
2306 | ||||
2307 | SCHED_TIME (u)((&node_sched_param_vec[u])->time) = new_time; | |||
2308 | crr_insn->cycle = new_time; | |||
2309 | SCHED_ROW (u)((&node_sched_param_vec[u])->row) = new_time % new_ii; | |||
2310 | SCHED_STAGE (u)((&node_sched_param_vec[u])->stage) = new_time / new_ii; | |||
2311 | } | |||
2312 | ||||
2313 | } | |||
2314 | ||||
2315 | rows_new[split_row] = NULLnullptr; | |||
2316 | ||||
2317 | for (row = split_row; row < ii; row++) | |||
2318 | { | |||
2319 | rows_new[row + 1] = ps->rows[row]; | |||
2320 | rows_length_new[row + 1] = ps->rows_length[row]; | |||
2321 | ps->rows[row] = NULLnullptr; | |||
2322 | for (crr_insn = rows_new[row + 1]; | |||
2323 | crr_insn; crr_insn = crr_insn->next_in_row) | |||
2324 | { | |||
2325 | int u = crr_insn->id; | |||
2326 | int new_time = SCHED_TIME (u)((&node_sched_param_vec[u])->time) + (SCHED_TIME (u)((&node_sched_param_vec[u])->time) / ii) + 1; | |||
2327 | ||||
2328 | SCHED_TIME (u)((&node_sched_param_vec[u])->time) = new_time; | |||
2329 | crr_insn->cycle = new_time; | |||
2330 | SCHED_ROW (u)((&node_sched_param_vec[u])->row) = new_time % new_ii; | |||
2331 | SCHED_STAGE (u)((&node_sched_param_vec[u])->stage) = new_time / new_ii; | |||
2332 | } | |||
2333 | } | |||
2334 | ||||
2335 | /* Updating ps. */ | |||
2336 | ps->min_cycle = ps->min_cycle + ps->min_cycle / ii | |||
2337 | + (SMODULO (ps->min_cycle, ii)((ps->min_cycle) % (ii) < 0 ? ((ps->min_cycle) % (ii ) + (ii)) : (ps->min_cycle) % (ii)) >= split_row ? 1 : 0); | |||
2338 | ps->max_cycle = ps->max_cycle + ps->max_cycle / ii | |||
2339 | + (SMODULO (ps->max_cycle, ii)((ps->max_cycle) % (ii) < 0 ? ((ps->max_cycle) % (ii ) + (ii)) : (ps->max_cycle) % (ii)) >= split_row ? 1 : 0); | |||
2340 | free (ps->rows); | |||
2341 | ps->rows = rows_new; | |||
2342 | free (ps->rows_length); | |||
2343 | ps->rows_length = rows_length_new; | |||
2344 | ps->ii = new_ii; | |||
2345 | gcc_assert (ps->min_cycle >= 0)((void)(!(ps->min_cycle >= 0) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2345, __FUNCTION__), 0 : 0)); | |||
2346 | ||||
2347 | verify_partial_schedule (ps, sched_nodes); | |||
2348 | ||||
2349 | if (dump_file) | |||
2350 | fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle, | |||
2351 | ps->max_cycle); | |||
2352 | } | |||
2353 | ||||
2354 | /* Given U_NODE which is the node that failed to be scheduled; LOW and | |||
2355 | UP which are the boundaries of it's scheduling window; compute using | |||
2356 | SCHED_NODES and II a row in the partial schedule that can be split | |||
2357 | which will separate a critical predecessor from a critical successor | |||
2358 | thereby expanding the window, and return it. */ | |||
2359 | static int | |||
2360 | compute_split_row (sbitmap sched_nodes, int low, int up, int ii, | |||
2361 | ddg_node_ptr u_node) | |||
2362 | { | |||
2363 | ddg_edge_ptr e; | |||
2364 | int lower = INT_MIN(-2147483647 -1), upper = INT_MAX2147483647; | |||
2365 | int crit_pred = -1; | |||
2366 | int crit_succ = -1; | |||
2367 | int crit_cycle; | |||
2368 | ||||
2369 | for (e = u_node->in; e != 0; e = e->next_in) | |||
2370 | { | |||
2371 | int v = e->src->cuid; | |||
2372 | ||||
2373 | if (bitmap_bit_p (sched_nodes, v) | |||
2374 | && (low == SCHED_TIME (v)((&node_sched_param_vec[v])->time) + e->latency - (e->distance * ii))) | |||
2375 | if (SCHED_TIME (v)((&node_sched_param_vec[v])->time) > lower) | |||
2376 | { | |||
2377 | crit_pred = v; | |||
2378 | lower = SCHED_TIME (v)((&node_sched_param_vec[v])->time); | |||
2379 | } | |||
2380 | } | |||
2381 | ||||
2382 | if (crit_pred >= 0) | |||
2383 | { | |||
2384 | crit_cycle = SCHED_TIME (crit_pred)((&node_sched_param_vec[crit_pred])->time) + 1; | |||
2385 | return SMODULO (crit_cycle, ii)((crit_cycle) % (ii) < 0 ? ((crit_cycle) % (ii) + (ii)) : ( crit_cycle) % (ii)); | |||
2386 | } | |||
2387 | ||||
2388 | for (e = u_node->out; e != 0; e = e->next_out) | |||
2389 | { | |||
2390 | int v = e->dest->cuid; | |||
2391 | ||||
2392 | if (bitmap_bit_p (sched_nodes, v) | |||
2393 | && (up == SCHED_TIME (v)((&node_sched_param_vec[v])->time) - e->latency + (e->distance * ii))) | |||
2394 | if (SCHED_TIME (v)((&node_sched_param_vec[v])->time) < upper) | |||
2395 | { | |||
2396 | crit_succ = v; | |||
2397 | upper = SCHED_TIME (v)((&node_sched_param_vec[v])->time); | |||
2398 | } | |||
2399 | } | |||
2400 | ||||
2401 | if (crit_succ >= 0) | |||
2402 | { | |||
2403 | crit_cycle = SCHED_TIME (crit_succ)((&node_sched_param_vec[crit_succ])->time); | |||
2404 | return SMODULO (crit_cycle, ii)((crit_cycle) % (ii) < 0 ? ((crit_cycle) % (ii) + (ii)) : ( crit_cycle) % (ii)); | |||
2405 | } | |||
2406 | ||||
2407 | if (dump_file) | |||
2408 | fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n"); | |||
2409 | ||||
2410 | return SMODULO ((low + up + 1) / 2, ii)(((low + up + 1) / 2) % (ii) < 0 ? (((low + up + 1) / 2) % (ii) + (ii)) : ((low + up + 1) / 2) % (ii)); | |||
2411 | } | |||
2412 | ||||
2413 | static void | |||
2414 | verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes) | |||
2415 | { | |||
2416 | int row; | |||
2417 | ps_insn_ptr crr_insn; | |||
2418 | ||||
2419 | for (row = 0; row < ps->ii; row++) | |||
2420 | { | |||
2421 | int length = 0; | |||
2422 | ||||
2423 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |||
2424 | { | |||
2425 | int u = crr_insn->id; | |||
2426 | ||||
2427 | length++; | |||
2428 | gcc_assert (bitmap_bit_p (sched_nodes, u))((void)(!(bitmap_bit_p (sched_nodes, u)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2428, __FUNCTION__), 0 : 0)); | |||
2429 | /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by | |||
2430 | popcount (sched_nodes) == number of insns in ps. */ | |||
2431 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle)((void)(!(((&node_sched_param_vec[u])->time) >= ps-> min_cycle) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2431, __FUNCTION__), 0 : 0)); | |||
2432 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle)((void)(!(((&node_sched_param_vec[u])->time) <= ps-> max_cycle) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2432, __FUNCTION__), 0 : 0)); | |||
2433 | } | |||
2434 | ||||
2435 | gcc_assert (ps->rows_length[row] == length)((void)(!(ps->rows_length[row] == length) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2435, __FUNCTION__), 0 : 0)); | |||
2436 | } | |||
2437 | } | |||
2438 | ||||
2439 | ||||
2440 | /* This page implements the algorithm for ordering the nodes of a DDG | |||
2441 | for modulo scheduling, activated through the | |||
2442 | "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */ | |||
2443 | ||||
2444 | #define ORDER_PARAMS(x)((struct node_order_params *) (x)->aux.info) ((struct node_order_params *) (x)->aux.info) | |||
2445 | #define ASAP(x)(((struct node_order_params *) ((x))->aux.info)->asap) (ORDER_PARAMS ((x))((struct node_order_params *) ((x))->aux.info)->asap) | |||
2446 | #define ALAP(x)(((struct node_order_params *) ((x))->aux.info)->alap) (ORDER_PARAMS ((x))((struct node_order_params *) ((x))->aux.info)->alap) | |||
2447 | #define HEIGHT(x)(((struct node_order_params *) ((x))->aux.info)->height ) (ORDER_PARAMS ((x))((struct node_order_params *) ((x))->aux.info)->height) | |||
2448 | #define MOB(x)((((struct node_order_params *) (((x)))->aux.info)->alap ) - (((struct node_order_params *) (((x)))->aux.info)-> asap)) (ALAP ((x))(((struct node_order_params *) (((x)))->aux.info)->alap ) - ASAP ((x))(((struct node_order_params *) (((x)))->aux.info)->asap )) | |||
2449 | #define DEPTH(x)((((struct node_order_params *) (((x)))->aux.info)->asap )) (ASAP ((x))(((struct node_order_params *) (((x)))->aux.info)->asap )) | |||
2450 | ||||
2451 | typedef struct node_order_params * nopa; | |||
2452 | ||||
2453 | static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result); | |||
2454 | static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int); | |||
2455 | static nopa calculate_order_params (ddg_ptr, int, int *); | |||
2456 | static int find_max_asap (ddg_ptr, sbitmap); | |||
2457 | static int find_max_hv_min_mob (ddg_ptr, sbitmap); | |||
2458 | static int find_max_dv_min_mob (ddg_ptr, sbitmap); | |||
2459 | ||||
2460 | enum sms_direction {BOTTOMUP, TOPDOWN}; | |||
2461 | ||||
2462 | struct node_order_params | |||
2463 | { | |||
2464 | int asap; | |||
2465 | int alap; | |||
2466 | int height; | |||
2467 | }; | |||
2468 | ||||
2469 | /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */ | |||
2470 | static void | |||
2471 | check_nodes_order (int *node_order, int num_nodes) | |||
2472 | { | |||
2473 | int i; | |||
2474 | auto_sbitmap tmp (num_nodes); | |||
2475 | ||||
2476 | bitmap_clear (tmp); | |||
2477 | ||||
2478 | if (dump_file) | |||
2479 | fprintf (dump_file, "SMS final nodes order: \n"); | |||
2480 | ||||
2481 | for (i = 0; i < num_nodes; i++) | |||
2482 | { | |||
2483 | int u = node_order[i]; | |||
2484 | ||||
2485 | if (dump_file) | |||
2486 | fprintf (dump_file, "%d ", u); | |||
2487 | gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u))((void)(!(u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u)) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2487, __FUNCTION__), 0 : 0)); | |||
2488 | ||||
2489 | bitmap_set_bit (tmp, u); | |||
2490 | } | |||
2491 | ||||
2492 | if (dump_file) | |||
2493 | fprintf (dump_file, "\n"); | |||
2494 | } | |||
2495 | ||||
2496 | /* Order the nodes of G for scheduling and pass the result in | |||
2497 | NODE_ORDER. Also set aux.count of each node to ASAP. | |||
2498 | Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */ | |||
2499 | static int | |||
2500 | sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap) | |||
2501 | { | |||
2502 | int i; | |||
2503 | int rec_mii = 0; | |||
2504 | ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g); | |||
2505 | ||||
2506 | nopa nops = calculate_order_params (g, mii, pmax_asap); | |||
2507 | ||||
2508 | if (dump_file) | |||
2509 | print_sccs (dump_file, sccs, g); | |||
2510 | ||||
2511 | order_nodes_of_sccs (sccs, node_order); | |||
2512 | ||||
2513 | if (sccs->num_sccs > 0) | |||
2514 | /* First SCC has the largest recurrence_length. */ | |||
2515 | rec_mii = sccs->sccs[0]->recurrence_length; | |||
2516 | ||||
2517 | /* Save ASAP before destroying node_order_params. */ | |||
2518 | for (i = 0; i < g->num_nodes; i++) | |||
2519 | { | |||
2520 | ddg_node_ptr v = &g->nodes[i]; | |||
2521 | v->aux.count = ASAP (v)(((struct node_order_params *) ((v))->aux.info)->asap); | |||
2522 | } | |||
2523 | ||||
2524 | free (nops); | |||
2525 | free_ddg_all_sccs (sccs); | |||
2526 | check_nodes_order (node_order, g->num_nodes); | |||
2527 | ||||
2528 | return rec_mii; | |||
2529 | } | |||
2530 | ||||
2531 | static void | |||
2532 | order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order) | |||
2533 | { | |||
2534 | int i, pos = 0; | |||
2535 | ddg_ptr g = all_sccs->ddg; | |||
2536 | int num_nodes = g->num_nodes; | |||
2537 | auto_sbitmap prev_sccs (num_nodes); | |||
2538 | auto_sbitmap on_path (num_nodes); | |||
2539 | auto_sbitmap tmp (num_nodes); | |||
2540 | auto_sbitmap ones (num_nodes); | |||
2541 | ||||
2542 | bitmap_clear (prev_sccs); | |||
2543 | bitmap_ones (ones); | |||
2544 | ||||
2545 | /* Perform the node ordering starting from the SCC with the highest recMII. | |||
2546 | For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */ | |||
2547 | for (i = 0; i < all_sccs->num_sccs; i++) | |||
2548 | { | |||
2549 | ddg_scc_ptr scc = all_sccs->sccs[i]; | |||
2550 | ||||
2551 | /* Add nodes on paths from previous SCCs to the current SCC. */ | |||
2552 | find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes); | |||
2553 | bitmap_ior (tmp, scc->nodes, on_path); | |||
2554 | ||||
2555 | /* Add nodes on paths from the current SCC to previous SCCs. */ | |||
2556 | find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs); | |||
2557 | bitmap_ior (tmp, tmp, on_path); | |||
2558 | ||||
2559 | /* Remove nodes of previous SCCs from current extended SCC. */ | |||
2560 | bitmap_and_compl (tmp, tmp, prev_sccs); | |||
2561 | ||||
2562 | pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); | |||
2563 | /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */ | |||
2564 | } | |||
2565 | ||||
2566 | /* Handle the remaining nodes that do not belong to any scc. Each call | |||
2567 | to order_nodes_in_scc handles a single connected component. */ | |||
2568 | while (pos < g->num_nodes) | |||
2569 | { | |||
2570 | bitmap_and_compl (tmp, ones, prev_sccs); | |||
2571 | pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); | |||
2572 | } | |||
2573 | } | |||
2574 | ||||
2575 | /* MII is needed if we consider backarcs (that do not close recursive cycles). */ | |||
2576 | static struct node_order_params * | |||
2577 | calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED__attribute__ ((__unused__)), int *pmax_asap) | |||
2578 | { | |||
2579 | int u; | |||
2580 | int max_asap; | |||
2581 | int num_nodes = g->num_nodes; | |||
2582 | ddg_edge_ptr e; | |||
2583 | /* Allocate a place to hold ordering params for each node in the DDG. */ | |||
2584 | nopa node_order_params_arr; | |||
2585 | ||||
2586 | /* Initialize of ASAP/ALAP/HEIGHT to zero. */ | |||
2587 | node_order_params_arr = (nopa) xcalloc (num_nodes, | |||
2588 | sizeof (struct node_order_params)); | |||
2589 | ||||
2590 | /* Set the aux pointer of each node to point to its order_params structure. */ | |||
2591 | for (u = 0; u < num_nodes; u++) | |||
2592 | g->nodes[u].aux.info = &node_order_params_arr[u]; | |||
2593 | ||||
2594 | /* Disregarding a backarc from each recursive cycle to obtain a DAG, | |||
2595 | calculate ASAP, ALAP, mobility, distance, and height for each node | |||
2596 | in the dependence (direct acyclic) graph. */ | |||
2597 | ||||
2598 | /* We assume that the nodes in the array are in topological order. */ | |||
2599 | ||||
2600 | max_asap = 0; | |||
2601 | for (u = 0; u < num_nodes; u++) | |||
2602 | { | |||
2603 | ddg_node_ptr u_node = &g->nodes[u]; | |||
2604 | ||||
2605 | ASAP (u_node)(((struct node_order_params *) ((u_node))->aux.info)->asap ) = 0; | |||
2606 | for (e = u_node->in; e; e = e->next_in) | |||
2607 | if (e->distance == 0) | |||
2608 | ASAP (u_node)(((struct node_order_params *) ((u_node))->aux.info)->asap ) = MAX (ASAP (u_node),(((((struct node_order_params *) ((u_node))->aux.info)-> asap)) > ((((struct node_order_params *) ((e->src))-> aux.info)->asap) + e->latency) ? ((((struct node_order_params *) ((u_node))->aux.info)->asap)) : ((((struct node_order_params *) ((e->src))->aux.info)->asap) + e->latency)) | |||
2609 | ASAP (e->src) + e->latency)(((((struct node_order_params *) ((u_node))->aux.info)-> asap)) > ((((struct node_order_params *) ((e->src))-> aux.info)->asap) + e->latency) ? ((((struct node_order_params *) ((u_node))->aux.info)->asap)) : ((((struct node_order_params *) ((e->src))->aux.info)->asap) + e->latency)); | |||
2610 | max_asap = MAX (max_asap, ASAP (u_node))((max_asap) > ((((struct node_order_params *) ((u_node))-> aux.info)->asap)) ? (max_asap) : ((((struct node_order_params *) ((u_node))->aux.info)->asap))); | |||
2611 | } | |||
2612 | ||||
2613 | for (u = num_nodes - 1; u > -1; u--) | |||
2614 | { | |||
2615 | ddg_node_ptr u_node = &g->nodes[u]; | |||
2616 | ||||
2617 | ALAP (u_node)(((struct node_order_params *) ((u_node))->aux.info)->alap ) = max_asap; | |||
2618 | HEIGHT (u_node)(((struct node_order_params *) ((u_node))->aux.info)->height ) = 0; | |||
2619 | for (e = u_node->out; e; e = e->next_out) | |||
2620 | if (e->distance == 0) | |||
2621 | { | |||
2622 | ALAP (u_node)(((struct node_order_params *) ((u_node))->aux.info)->alap ) = MIN (ALAP (u_node),(((((struct node_order_params *) ((u_node))->aux.info)-> alap)) < ((((struct node_order_params *) ((e->dest))-> aux.info)->alap) - e->latency) ? ((((struct node_order_params *) ((u_node))->aux.info)->alap)) : ((((struct node_order_params *) ((e->dest))->aux.info)->alap) - e->latency)) | |||
2623 | ALAP (e->dest) - e->latency)(((((struct node_order_params *) ((u_node))->aux.info)-> alap)) < ((((struct node_order_params *) ((e->dest))-> aux.info)->alap) - e->latency) ? ((((struct node_order_params *) ((u_node))->aux.info)->alap)) : ((((struct node_order_params *) ((e->dest))->aux.info)->alap) - e->latency)); | |||
2624 | HEIGHT (u_node)(((struct node_order_params *) ((u_node))->aux.info)->height ) = MAX (HEIGHT (u_node),(((((struct node_order_params *) ((u_node))->aux.info)-> height)) > ((((struct node_order_params *) ((e->dest))-> aux.info)->height) + e->latency) ? ((((struct node_order_params *) ((u_node))->aux.info)->height)) : ((((struct node_order_params *) ((e->dest))->aux.info)->height) + e->latency) ) | |||
2625 | HEIGHT (e->dest) + e->latency)(((((struct node_order_params *) ((u_node))->aux.info)-> height)) > ((((struct node_order_params *) ((e->dest))-> aux.info)->height) + e->latency) ? ((((struct node_order_params *) ((u_node))->aux.info)->height)) : ((((struct node_order_params *) ((e->dest))->aux.info)->height) + e->latency) ); | |||
2626 | } | |||
2627 | } | |||
2628 | if (dump_file) | |||
2629 | { | |||
2630 | fprintf (dump_file, "\nOrder params\n"); | |||
2631 | for (u = 0; u < num_nodes; u++) | |||
2632 | { | |||
2633 | ddg_node_ptr u_node = &g->nodes[u]; | |||
2634 | ||||
2635 | fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u, | |||
2636 | ASAP (u_node)(((struct node_order_params *) ((u_node))->aux.info)->asap ), ALAP (u_node)(((struct node_order_params *) ((u_node))->aux.info)->alap ), HEIGHT (u_node)(((struct node_order_params *) ((u_node))->aux.info)->height )); | |||
2637 | } | |||
2638 | } | |||
2639 | ||||
2640 | *pmax_asap = max_asap; | |||
2641 | return node_order_params_arr; | |||
2642 | } | |||
2643 | ||||
2644 | static int | |||
2645 | find_max_asap (ddg_ptr g, sbitmap nodes) | |||
2646 | { | |||
2647 | unsigned int u = 0; | |||
2648 | int max_asap = -1; | |||
2649 | int result = -1; | |||
2650 | sbitmap_iterator sbi; | |||
2651 | ||||
2652 | EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)for (bmp_iter_set_init (&(sbi), (nodes), (0), &(u)); bmp_iter_set (&(sbi), &(u)); bmp_iter_next (&(sbi), &(u)) ) | |||
2653 | { | |||
2654 | ddg_node_ptr u_node = &g->nodes[u]; | |||
2655 | ||||
2656 | if (max_asap < ASAP (u_node)(((struct node_order_params *) ((u_node))->aux.info)->asap )) | |||
2657 | { | |||
2658 | max_asap = ASAP (u_node)(((struct node_order_params *) ((u_node))->aux.info)->asap ); | |||
2659 | result = u; | |||
2660 | } | |||
2661 | } | |||
2662 | return result; | |||
2663 | } | |||
2664 | ||||
2665 | static int | |||
2666 | find_max_hv_min_mob (ddg_ptr g, sbitmap nodes) | |||
2667 | { | |||
2668 | unsigned int u = 0; | |||
2669 | int max_hv = -1; | |||
2670 | int min_mob = INT_MAX2147483647; | |||
2671 | int result = -1; | |||
2672 | sbitmap_iterator sbi; | |||
2673 | ||||
2674 | EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)for (bmp_iter_set_init (&(sbi), (nodes), (0), &(u)); bmp_iter_set (&(sbi), &(u)); bmp_iter_next (&(sbi), &(u)) ) | |||
2675 | { | |||
2676 | ddg_node_ptr u_node = &g->nodes[u]; | |||
2677 | ||||
2678 | if (max_hv < HEIGHT (u_node)(((struct node_order_params *) ((u_node))->aux.info)->height )) | |||
2679 | { | |||
2680 | max_hv = HEIGHT (u_node)(((struct node_order_params *) ((u_node))->aux.info)->height ); | |||
2681 | min_mob = MOB (u_node)((((struct node_order_params *) (((u_node)))->aux.info)-> alap) - (((struct node_order_params *) (((u_node)))->aux.info )->asap)); | |||
2682 | result = u; | |||
2683 | } | |||
2684 | else if ((max_hv == HEIGHT (u_node)(((struct node_order_params *) ((u_node))->aux.info)->height )) | |||
2685 | && (min_mob > MOB (u_node)((((struct node_order_params *) (((u_node)))->aux.info)-> alap) - (((struct node_order_params *) (((u_node)))->aux.info )->asap)))) | |||
2686 | { | |||
2687 | min_mob = MOB (u_node)((((struct node_order_params *) (((u_node)))->aux.info)-> alap) - (((struct node_order_params *) (((u_node)))->aux.info )->asap)); | |||
2688 | result = u; | |||
2689 | } | |||
2690 | } | |||
2691 | return result; | |||
2692 | } | |||
2693 | ||||
2694 | static int | |||
2695 | find_max_dv_min_mob (ddg_ptr g, sbitmap nodes) | |||
2696 | { | |||
2697 | unsigned int u = 0; | |||
2698 | int max_dv = -1; | |||
2699 | int min_mob = INT_MAX2147483647; | |||
2700 | int result = -1; | |||
2701 | sbitmap_iterator sbi; | |||
2702 | ||||
2703 | EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)for (bmp_iter_set_init (&(sbi), (nodes), (0), &(u)); bmp_iter_set (&(sbi), &(u)); bmp_iter_next (&(sbi), &(u)) ) | |||
2704 | { | |||
2705 | ddg_node_ptr u_node = &g->nodes[u]; | |||
2706 | ||||
2707 | if (max_dv < DEPTH (u_node)((((struct node_order_params *) (((u_node)))->aux.info)-> asap))) | |||
2708 | { | |||
2709 | max_dv = DEPTH (u_node)((((struct node_order_params *) (((u_node)))->aux.info)-> asap)); | |||
2710 | min_mob = MOB (u_node)((((struct node_order_params *) (((u_node)))->aux.info)-> alap) - (((struct node_order_params *) (((u_node)))->aux.info )->asap)); | |||
2711 | result = u; | |||
2712 | } | |||
2713 | else if ((max_dv == DEPTH (u_node)((((struct node_order_params *) (((u_node)))->aux.info)-> asap))) | |||
2714 | && (min_mob > MOB (u_node)((((struct node_order_params *) (((u_node)))->aux.info)-> alap) - (((struct node_order_params *) (((u_node)))->aux.info )->asap)))) | |||
2715 | { | |||
2716 | min_mob = MOB (u_node)((((struct node_order_params *) (((u_node)))->aux.info)-> alap) - (((struct node_order_params *) (((u_node)))->aux.info )->asap)); | |||
2717 | result = u; | |||
2718 | } | |||
2719 | } | |||
2720 | return result; | |||
2721 | } | |||
2722 | ||||
2723 | /* Places the nodes of SCC into the NODE_ORDER array starting | |||
2724 | at position POS, according to the SMS ordering algorithm. | |||
2725 | NODES_ORDERED (in&out parameter) holds the bitset of all nodes in | |||
2726 | the NODE_ORDER array, starting from position zero. */ | |||
2727 | static int | |||
2728 | order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc, | |||
2729 | int * node_order, int pos) | |||
2730 | { | |||
2731 | enum sms_direction dir; | |||
2732 | int num_nodes = g->num_nodes; | |||
2733 | auto_sbitmap workset (num_nodes); | |||
2734 | auto_sbitmap tmp (num_nodes); | |||
2735 | sbitmap zero_bitmap = sbitmap_alloc (num_nodes); | |||
2736 | auto_sbitmap predecessors (num_nodes); | |||
2737 | auto_sbitmap successors (num_nodes); | |||
2738 | ||||
2739 | bitmap_clear (predecessors); | |||
2740 | find_predecessors (predecessors, g, nodes_ordered); | |||
2741 | ||||
2742 | bitmap_clear (successors); | |||
2743 | find_successors (successors, g, nodes_ordered); | |||
2744 | ||||
2745 | bitmap_clear (tmp); | |||
2746 | if (bitmap_and (tmp, predecessors, scc)) | |||
2747 | { | |||
2748 | bitmap_copy (workset, tmp); | |||
2749 | dir = BOTTOMUP; | |||
2750 | } | |||
2751 | else if (bitmap_and (tmp, successors, scc)) | |||
2752 | { | |||
2753 | bitmap_copy (workset, tmp); | |||
2754 | dir = TOPDOWN; | |||
2755 | } | |||
2756 | else | |||
2757 | { | |||
2758 | int u; | |||
2759 | ||||
2760 | bitmap_clear (workset); | |||
2761 | if ((u = find_max_asap (g, scc)) >= 0) | |||
2762 | bitmap_set_bit (workset, u); | |||
2763 | dir = BOTTOMUP; | |||
2764 | } | |||
2765 | ||||
2766 | bitmap_clear (zero_bitmap); | |||
2767 | while (!bitmap_equal_p (workset, zero_bitmap)) | |||
2768 | { | |||
2769 | int v; | |||
2770 | ddg_node_ptr v_node; | |||
2771 | sbitmap v_node_preds; | |||
2772 | sbitmap v_node_succs; | |||
2773 | ||||
2774 | if (dir == TOPDOWN) | |||
2775 | { | |||
2776 | while (!bitmap_equal_p (workset, zero_bitmap)) | |||
2777 | { | |||
2778 | v = find_max_hv_min_mob (g, workset); | |||
2779 | v_node = &g->nodes[v]; | |||
2780 | node_order[pos++] = v; | |||
2781 | v_node_succs = NODE_SUCCESSORS (v_node)((v_node)->successors); | |||
2782 | bitmap_and (tmp, v_node_succs, scc); | |||
2783 | ||||
2784 | /* Don't consider the already ordered successors again. */ | |||
2785 | bitmap_and_compl (tmp, tmp, nodes_ordered); | |||
2786 | bitmap_ior (workset, workset, tmp); | |||
2787 | bitmap_clear_bit (workset, v); | |||
2788 | bitmap_set_bit (nodes_ordered, v); | |||
2789 | } | |||
2790 | dir = BOTTOMUP; | |||
2791 | bitmap_clear (predecessors); | |||
2792 | find_predecessors (predecessors, g, nodes_ordered); | |||
2793 | bitmap_and (workset, predecessors, scc); | |||
2794 | } | |||
2795 | else | |||
2796 | { | |||
2797 | while (!bitmap_equal_p (workset, zero_bitmap)) | |||
2798 | { | |||
2799 | v = find_max_dv_min_mob (g, workset); | |||
2800 | v_node = &g->nodes[v]; | |||
2801 | node_order[pos++] = v; | |||
2802 | v_node_preds = NODE_PREDECESSORS (v_node)((v_node)->predecessors); | |||
2803 | bitmap_and (tmp, v_node_preds, scc); | |||
2804 | ||||
2805 | /* Don't consider the already ordered predecessors again. */ | |||
2806 | bitmap_and_compl (tmp, tmp, nodes_ordered); | |||
2807 | bitmap_ior (workset, workset, tmp); | |||
2808 | bitmap_clear_bit (workset, v); | |||
2809 | bitmap_set_bit (nodes_ordered, v); | |||
2810 | } | |||
2811 | dir = TOPDOWN; | |||
2812 | bitmap_clear (successors); | |||
2813 | find_successors (successors, g, nodes_ordered); | |||
2814 | bitmap_and (workset, successors, scc); | |||
2815 | } | |||
2816 | } | |||
2817 | sbitmap_free (zero_bitmap); | |||
2818 | return pos; | |||
2819 | } | |||
2820 | ||||
2821 | ||||
2822 | /* This page contains functions for manipulating partial-schedules during | |||
2823 | modulo scheduling. */ | |||
2824 | ||||
2825 | /* Create a partial schedule and allocate a memory to hold II rows. */ | |||
2826 | ||||
2827 | static partial_schedule_ptr | |||
2828 | create_partial_schedule (int ii, ddg_ptr g, int history) | |||
2829 | { | |||
2830 | partial_schedule_ptr ps = XNEW (struct partial_schedule)((struct partial_schedule *) xmalloc (sizeof (struct partial_schedule ))); | |||
2831 | ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); | |||
2832 | ps->rows_length = (int *) xcalloc (ii, sizeof (int)); | |||
2833 | ps->reg_moves.create (0); | |||
2834 | ps->ii = ii; | |||
2835 | ps->history = history; | |||
2836 | ps->min_cycle = INT_MAX2147483647; | |||
2837 | ps->max_cycle = INT_MIN(-2147483647 -1); | |||
2838 | ps->g = g; | |||
2839 | ||||
2840 | return ps; | |||
2841 | } | |||
2842 | ||||
2843 | /* Free the PS_INSNs in rows array of the given partial schedule. | |||
2844 | ??? Consider caching the PS_INSN's. */ | |||
2845 | static void | |||
2846 | free_ps_insns (partial_schedule_ptr ps) | |||
2847 | { | |||
2848 | int i; | |||
2849 | ||||
2850 | for (i = 0; i < ps->ii; i++) | |||
2851 | { | |||
2852 | while (ps->rows[i]) | |||
2853 | { | |||
2854 | ps_insn_ptr ps_insn = ps->rows[i]->next_in_row; | |||
2855 | ||||
2856 | free (ps->rows[i]); | |||
2857 | ps->rows[i] = ps_insn; | |||
2858 | } | |||
2859 | ps->rows[i] = NULLnullptr; | |||
2860 | } | |||
2861 | } | |||
2862 | ||||
2863 | /* Free all the memory allocated to the partial schedule. */ | |||
2864 | ||||
2865 | static void | |||
2866 | free_partial_schedule (partial_schedule_ptr ps) | |||
2867 | { | |||
2868 | ps_reg_move_info *move; | |||
2869 | unsigned int i; | |||
2870 | ||||
2871 | if (!ps) | |||
2872 | return; | |||
2873 | ||||
2874 | FOR_EACH_VEC_ELT (ps->reg_moves, i, move)for (i = 0; (ps->reg_moves).iterate ((i), &(move)); ++ (i)) | |||
2875 | sbitmap_free (move->uses); | |||
2876 | ps->reg_moves.release (); | |||
2877 | ||||
2878 | free_ps_insns (ps); | |||
2879 | free (ps->rows); | |||
2880 | free (ps->rows_length); | |||
2881 | free (ps); | |||
2882 | } | |||
2883 | ||||
2884 | /* Clear the rows array with its PS_INSNs, and create a new one with | |||
2885 | NEW_II rows. */ | |||
2886 | ||||
2887 | static void | |||
2888 | reset_partial_schedule (partial_schedule_ptr ps, int new_ii) | |||
2889 | { | |||
2890 | if (!ps) | |||
2891 | return; | |||
2892 | free_ps_insns (ps); | |||
2893 | if (new_ii == ps->ii) | |||
2894 | return; | |||
2895 | ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii | |||
2896 | * sizeof (ps_insn_ptr)); | |||
2897 | memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr)); | |||
2898 | ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int)); | |||
2899 | memset (ps->rows_length, 0, new_ii * sizeof (int)); | |||
2900 | ps->ii = new_ii; | |||
2901 | ps->min_cycle = INT_MAX2147483647; | |||
2902 | ps->max_cycle = INT_MIN(-2147483647 -1); | |||
2903 | } | |||
2904 | ||||
2905 | /* Prints the partial schedule as an ii rows array, for each rows | |||
2906 | print the ids of the insns in it. */ | |||
2907 | void | |||
2908 | print_partial_schedule (partial_schedule_ptr ps, FILE *dump) | |||
2909 | { | |||
2910 | int i; | |||
2911 | ||||
2912 | for (i = 0; i < ps->ii; i++) | |||
2913 | { | |||
2914 | ps_insn_ptr ps_i = ps->rows[i]; | |||
2915 | ||||
2916 | fprintf (dump, "\n[ROW %d ]: ", i); | |||
2917 | while (ps_i) | |||
2918 | { | |||
2919 | rtx_insn *insn = ps_rtl_insn (ps, ps_i->id); | |||
2920 | ||||
2921 | if (JUMP_P (insn)(((enum rtx_code) (insn)->code) == JUMP_INSN)) | |||
2922 | fprintf (dump, "%d (branch), ", INSN_UID (insn)); | |||
2923 | else | |||
2924 | fprintf (dump, "%d, ", INSN_UID (insn)); | |||
2925 | ||||
2926 | ps_i = ps_i->next_in_row; | |||
2927 | } | |||
2928 | } | |||
2929 | } | |||
2930 | ||||
2931 | /* Creates an object of PS_INSN and initializes it to the given parameters. */ | |||
2932 | static ps_insn_ptr | |||
2933 | create_ps_insn (int id, int cycle) | |||
2934 | { | |||
2935 | ps_insn_ptr ps_i = XNEW (struct ps_insn)((struct ps_insn *) xmalloc (sizeof (struct ps_insn))); | |||
2936 | ||||
2937 | ps_i->id = id; | |||
2938 | ps_i->next_in_row = NULLnullptr; | |||
2939 | ps_i->prev_in_row = NULLnullptr; | |||
2940 | ps_i->cycle = cycle; | |||
2941 | ||||
2942 | return ps_i; | |||
2943 | } | |||
2944 | ||||
2945 | ||||
2946 | /* Removes the given PS_INSN from the partial schedule. */ | |||
2947 | static void | |||
2948 | remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) | |||
2949 | { | |||
2950 | int row; | |||
2951 | ||||
2952 | gcc_assert (ps && ps_i)((void)(!(ps && ps_i) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2952, __FUNCTION__), 0 : 0)); | |||
2953 | ||||
2954 | row = SMODULO (ps_i->cycle, ps->ii)((ps_i->cycle) % (ps->ii) < 0 ? ((ps_i->cycle) % ( ps->ii) + (ps->ii)) : (ps_i->cycle) % (ps->ii)); | |||
2955 | if (! ps_i->prev_in_row) | |||
2956 | { | |||
2957 | gcc_assert (ps_i == ps->rows[row])((void)(!(ps_i == ps->rows[row]) ? fancy_abort ("/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/modulo-sched.cc" , 2957, __FUNCTION__), 0 : 0)); | |||
2958 | ps->rows[row] = ps_i->next_in_row; | |||
2959 | if (ps->rows[row]) | |||
2960 | ps->rows[row]->prev_in_row = NULLnullptr; | |||
2961 | } | |||
2962 | else | |||
2963 | { | |||
2964 | ps_i->prev_in_row->next_in_row = ps_i->next_in_row; | |||
2965 | if (ps_i->next_in_row) | |||
2966 | ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; | |||
2967 | } | |||
2968 | ||||
2969 | ps->rows_length[row] -= 1; | |||
2970 | free (ps_i); | |||
2971 | return; | |||
2972 | } | |||
2973 | ||||
2974 | /* Unlike what literature describes for modulo scheduling (which focuses | |||
2975 | on VLIW machines) the order of the instructions inside a cycle is | |||
2976 | important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know | |||
2977 | where the current instruction should go relative to the already | |||
2978 | scheduled instructions in the given cycle. Go over these | |||
2979 | instructions and find the first possible column to put it in. */ | |||
2980 | static bool | |||
2981 | ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, | |||
2982 | sbitmap must_precede, sbitmap must_follow) | |||
2983 | { | |||
2984 | ps_insn_ptr next_ps_i; | |||
2985 | ps_insn_ptr first_must_follow = NULLnullptr; | |||
2986 | ps_insn_ptr last_must_precede = NULLnullptr; | |||
2987 | ps_insn_ptr last_in_row = NULLnullptr; | |||
2988 | int row; | |||
2989 | ||||
2990 | if (! ps_i) | |||
2991 | return false; | |||
2992 | ||||
2993 | row = SMODULO (ps_i->cycle, ps->ii)((ps_i->cycle) % (ps->ii) < 0 ? ((ps_i->cycle) % ( ps->ii) + (ps->ii)) : (ps_i->cycle) % (ps->ii)); | |||
2994 | ||||
2995 | /* Find the first must follow and the last must precede | |||
2996 | and insert the node immediately after the must precede | |||
2997 | but make sure that it there is no must follow after it. */ | |||
2998 | for (next_ps_i = ps->rows[row]; | |||
2999 | next_ps_i; | |||
3000 | next_ps_i = next_ps_i->next_in_row) | |||
3001 | { | |||
3002 | if (must_follow | |||
3003 | && bitmap_bit_p (must_follow, next_ps_i->id) | |||
3004 | && ! first_must_follow) | |||
3005 | first_must_follow = next_ps_i; | |||
3006 | if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id)) | |||
3007 | { | |||
3008 | /* If we have already met a node that must follow, then | |||
3009 | there is no possible column. */ | |||
3010 | if (first_must_follow) | |||
3011 | return false; | |||
3012 | else | |||
3013 | last_must_precede = next_ps_i; | |||
3014 | } | |||
3015 | /* The closing branch must be the last in the row. */ | |||
3016 | if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id))(((enum rtx_code) (ps_rtl_insn (ps, next_ps_i->id))->code ) == JUMP_INSN)) | |||
3017 | return false; | |||
3018 | ||||
3019 | last_in_row = next_ps_i; | |||
3020 | } | |||
3021 | ||||
3022 | /* The closing branch is scheduled as well. Make sure there is no | |||
3023 | dependent instruction after it as the branch should be the last | |||
3024 | instruction in the row. */ | |||
3025 | if (JUMP_P (ps_rtl_insn (ps, ps_i->id))(((enum rtx_code) (ps_rtl_insn (ps, ps_i->id))->code) == JUMP_INSN)) | |||
3026 | { | |||
3027 | if (first_must_follow) | |||
3028 | return false; | |||
3029 | if (last_in_row) | |||
3030 | { | |||
3031 | /* Make the branch the last in the row. New instructions | |||
3032 | will be inserted at the beginning of the row or after the | |||
3033 | last must_precede instruction thus the branch is guaranteed | |||
3034 | to remain the last instruction in the row. */ | |||
3035 | last_in_row->next_in_row = ps_i; | |||
3036 | ps_i->prev_in_row = last_in_row; | |||
3037 | ps_i->next_in_row = NULLnullptr; | |||
3038 | } | |||
3039 | else | |||
3040 | ps->rows[row] = ps_i; | |||
3041 | return true; | |||
3042 | } | |||
3043 | ||||
3044 | /* Now insert the node after INSERT_AFTER_PSI. */ | |||
3045 | ||||
3046 | if (! last_must_precede) | |||
3047 | { | |||
3048 | ps_i->next_in_row = ps->rows[row]; | |||
3049 | ps_i->prev_in_row = NULLnullptr; | |||
3050 | if (ps_i->next_in_row) | |||
3051 | ps_i->next_in_row->prev_in_row = ps_i; | |||
3052 | ps->rows[row] = ps_i; | |||
3053 | } | |||
3054 | else | |||
3055 | { | |||
3056 | ps_i->next_in_row = last_must_precede->next_in_row; | |||
3057 | last_must_precede->next_in_row = ps_i; | |||
3058 | ps_i->prev_in_row = last_must_precede; | |||
3059 | if (ps_i->next_in_row) | |||
3060 | ps_i->next_in_row->prev_in_row = ps_i; | |||
3061 | } | |||
3062 | ||||
3063 | return true; | |||
3064 | } | |||
3065 | ||||
3066 | /* Advances the PS_INSN one column in its current row; returns false | |||
3067 | in failure and true in success. Bit N is set in MUST_FOLLOW if | |||
3068 | the node with cuid N must be come after the node pointed to by | |||
3069 | PS_I when scheduled in the same cycle. */ | |||
3070 | static int | |||
3071 | ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, | |||
3072 | sbitmap must_follow) | |||
3073 | { | |||
3074 | ps_insn_ptr prev, next; | |||
3075 | int row; | |||
3076 | ||||
3077 | if (!ps || !ps_i) | |||
3078 | return false; | |||
3079 | ||||
3080 | row = SMODULO (ps_i->cycle, ps->ii)((ps_i->cycle) % (ps->ii) < 0 ? ((ps_i->cycle) % ( ps->ii) + (ps->ii)) : (ps_i->cycle) % (ps->ii)); | |||
3081 | ||||
3082 | if (! ps_i->next_in_row) | |||
3083 | return false; | |||
3084 | ||||
3085 | /* Check if next_in_row is dependent on ps_i, both having same sched | |||
3086 | times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ | |||
3087 | if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id)) | |||
3088 | return false; | |||
3089 | ||||
3090 | /* Advance PS_I over its next_in_row in the doubly linked list. */ | |||
3091 | prev = ps_i->prev_in_row; | |||
3092 | next = ps_i->next_in_row; | |||
3093 | ||||
3094 | if (ps_i == ps->rows[row]) | |||
3095 | ps->rows[row] = next; | |||
3096 | ||||
3097 | ps_i->next_in_row = next->next_in_row; | |||
3098 | ||||
3099 | if (next->next_in_row) | |||
3100 | next->next_in_row->prev_in_row = ps_i; | |||
3101 | ||||
3102 | next->next_in_row = ps_i; | |||
3103 | ps_i->prev_in_row = next; | |||
3104 | ||||
3105 | next->prev_in_row = prev; | |||
3106 | if (prev) | |||
3107 | prev->next_in_row = next; | |||
3108 | ||||
3109 | return true; | |||
3110 | } | |||
3111 | ||||
3112 | /* Inserts a DDG_NODE to the given partial schedule at the given cycle. | |||
3113 | Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is | |||
3114 | set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come | |||
3115 | before/after (respectively) the node pointed to by PS_I when scheduled | |||
3116 | in the same cycle. */ | |||
3117 | static ps_insn_ptr | |||
3118 | add_node_to_ps (partial_schedule_ptr ps, int id, int cycle, | |||
3119 | sbitmap must_precede, sbitmap must_follow) | |||
3120 | { | |||
3121 | ps_insn_ptr ps_i; | |||
3122 | int row = SMODULO (cycle, ps->ii)((cycle) % (ps->ii) < 0 ? ((cycle) % (ps->ii) + (ps-> ii)) : (cycle) % (ps->ii)); | |||
3123 | ||||
3124 | if (ps->rows_length[row] >= issue_rate) | |||
3125 | return NULLnullptr; | |||
3126 | ||||
3127 | ps_i = create_ps_insn (id, cycle); | |||
3128 | ||||
3129 | /* Finds and inserts PS_I according to MUST_FOLLOW and | |||
3130 | MUST_PRECEDE. */ | |||
3131 | if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow)) | |||
3132 | { | |||
3133 | free (ps_i); | |||
3134 | return NULLnullptr; | |||
3135 | } | |||
3136 | ||||
3137 | ps->rows_length[row] += 1; | |||
3138 | return ps_i; | |||
3139 | } | |||
3140 | ||||
3141 | /* Advance time one cycle. Assumes DFA is being used. */ | |||
3142 | static void | |||
3143 | advance_one_cycle (void) | |||
3144 | { | |||
3145 | if (targetm.sched.dfa_pre_cycle_insn) | |||
3146 | state_transition (curr_state, | |||
3147 | targetm.sched.dfa_pre_cycle_insn ()); | |||
3148 | ||||
3149 | state_transition (curr_state, NULLnullptr); | |||
3150 | ||||
3151 | if (targetm.sched.dfa_post_cycle_insn) | |||
3152 | state_transition (curr_state, | |||
3153 | targetm.sched.dfa_post_cycle_insn ()); | |||
3154 | } | |||
3155 | ||||
3156 | ||||
3157 | ||||
3158 | /* Checks if PS has resource conflicts according to DFA, starting from | |||
3159 | FROM cycle to TO cycle; returns true if there are conflicts and false | |||
3160 | if there are no conflicts. Assumes DFA is being used. */ | |||
3161 | static int | |||
3162 | ps_has_conflicts (partial_schedule_ptr ps, int from, int to) | |||
3163 | { | |||
3164 | int cycle; | |||
3165 | ||||
3166 | state_reset (curr_state); | |||
3167 | ||||
3168 | for (cycle = from; cycle <= to; cycle++) | |||
3169 | { | |||
3170 | ps_insn_ptr crr_insn; | |||
3171 | /* Holds the remaining issue slots in the current row. */ | |||
3172 | int can_issue_more = issue_rate; | |||
3173 | ||||
3174 | /* Walk through the DFA for the current row. */ | |||
3175 | for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)((cycle) % (ps->ii) < 0 ? ((cycle) % (ps->ii) + (ps-> ii)) : (cycle) % (ps->ii))]; | |||
3176 | crr_insn; | |||
3177 | crr_insn = crr_insn->next_in_row) | |||
3178 | { | |||
3179 | rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id); | |||
3180 | ||||
3181 | /* Check if there is room for the current insn. */ | |||
3182 | if (!can_issue_more || state_dead_lock_p (curr_state)) | |||
3183 | return true; | |||
3184 | ||||
3185 | /* Update the DFA state and return with failure if the DFA found | |||
3186 | resource conflicts. */ | |||
3187 | if (state_transition (curr_state, insn) >= 0) | |||
3188 | return true; | |||
3189 | ||||
3190 | if (targetm.sched.variable_issue) | |||
3191 | can_issue_more = | |||
3192 | targetm.sched.variable_issue (sched_dump, sched_verbose, | |||
3193 | insn, can_issue_more); | |||
3194 | /* A naked CLOBBER or USE generates no instruction, so don't | |||
3195 | let them consume issue slots. */ | |||
3196 | else if (GET_CODE (PATTERN (insn))((enum rtx_code) (PATTERN (insn))->code) != USE | |||
3197 | && GET_CODE (PATTERN (insn))((enum rtx_code) (PATTERN (insn))->code) != CLOBBER) | |||
3198 | can_issue_more--; | |||
3199 | } | |||
3200 | ||||
3201 | /* Advance the DFA to the next cycle. */ | |||
3202 | advance_one_cycle (); | |||
3203 | } | |||
3204 | return false; | |||
3205 | } | |||
3206 | ||||
3207 | /* Checks if the given node causes resource conflicts when added to PS at | |||
3208 | cycle C. If not the node is added to PS and returned; otherwise zero | |||
3209 | is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with | |||
3210 | cuid N must be come before/after (respectively) the node pointed to by | |||
3211 | PS_I when scheduled in the same cycle. */ | |||
3212 | ps_insn_ptr | |||
3213 | ps_add_node_check_conflicts (partial_schedule_ptr ps, int n, | |||
3214 | int c, sbitmap must_precede, | |||
3215 | sbitmap must_follow) | |||
3216 | { | |||
3217 | int i, first, amount, has_conflicts = 0; | |||
3218 | ps_insn_ptr ps_i; | |||
3219 | ||||
3220 | /* First add the node to the PS, if this succeeds check for | |||
3221 | conflicts, trying different issue slots in the same row. */ | |||
3222 | if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow))) | |||
3223 | return NULLnullptr; /* Failed to insert the node at the given cycle. */ | |||
3224 | ||||
3225 | while (1) | |||
3226 | { | |||
3227 | has_conflicts = ps_has_conflicts (ps, c, c); | |||
3228 | if (ps->history > 0 && !has_conflicts) | |||
3229 | { | |||
3230 | /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h, | |||
3231 | but not more than ii intervals. */ | |||
3232 | first = c - ps->history; | |||
3233 | amount = 2 * ps->history + 1; | |||
3234 | if (amount > ps->ii) | |||
3235 | amount = ps->ii; | |||
3236 | for (i = first; i < first + amount; i++) | |||
3237 | { | |||
3238 | has_conflicts = ps_has_conflicts (ps, | |||
3239 | i - ps->history, | |||
3240 | i + ps->history); | |||
3241 | if (has_conflicts) | |||
3242 | break; | |||
3243 | } | |||
3244 | } | |||
3245 | if (!has_conflicts) | |||
3246 | break; | |||
3247 | /* Try different issue slots to find one that the given node can be | |||
3248 | scheduled in without conflicts. */ | |||
3249 | if (! ps_insn_advance_column (ps, ps_i, must_follow)) | |||
3250 | break; | |||
3251 | } | |||
3252 | ||||
3253 | if (has_conflicts) | |||
3254 | { | |||
3255 | remove_node_from_ps (ps, ps_i); | |||
3256 | return NULLnullptr; | |||
3257 | } | |||
3258 | ||||
3259 | ps->min_cycle = MIN (ps->min_cycle, c)((ps->min_cycle) < (c) ? (ps->min_cycle) : (c)); | |||
3260 | ps->max_cycle = MAX (ps->max_cycle, c)((ps->max_cycle) > (c) ? (ps->max_cycle) : (c)); | |||
3261 | return ps_i; | |||
3262 | } | |||
3263 | ||||
3264 | /* Calculate the stage count of the partial schedule PS. The calculation | |||
3265 | takes into account the rotation amount passed in ROTATION_AMOUNT. */ | |||
3266 | int | |||
3267 | calculate_stage_count (partial_schedule_ptr ps, int rotation_amount) | |||
3268 | { | |||
3269 | int new_min_cycle = PS_MIN_CYCLE (ps)(((partial_schedule_ptr)(ps))->min_cycle) - rotation_amount; | |||
3270 | int new_max_cycle = PS_MAX_CYCLE (ps)(((partial_schedule_ptr)(ps))->max_cycle) - rotation_amount; | |||
3271 | int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii)((-1 - new_min_cycle + 1 + ps->ii - 1) / ps->ii); | |||
3272 | ||||
3273 | /* The calculation of stage count is done adding the number of stages | |||
3274 | before cycle zero and after cycle zero. */ | |||
3275 | stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii)((new_max_cycle - 0 + 1 + ps->ii - 1) / ps->ii); | |||
3276 | ||||
3277 | return stage_count; | |||
3278 | } | |||
3279 | ||||
3280 | /* Rotate the rows of PS such that insns scheduled at time | |||
3281 | START_CYCLE will appear in row 0. Updates max/min_cycles. */ | |||
3282 | void | |||
3283 | rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle) | |||
3284 | { | |||
3285 | int i, row, backward_rotates; | |||
3286 | int last_row = ps->ii - 1; | |||
3287 | ||||
3288 | if (start_cycle == 0) | |||
3289 | return; | |||
3290 | ||||
3291 | backward_rotates = SMODULO (start_cycle, ps->ii)((start_cycle) % (ps->ii) < 0 ? ((start_cycle) % (ps-> ii) + (ps->ii)) : (start_cycle) % (ps->ii)); | |||
3292 | ||||
3293 | /* Revisit later and optimize this into a single loop. */ | |||
3294 | for (i = 0; i < backward_rotates; i++) | |||
3295 | { | |||
3296 | ps_insn_ptr first_row = ps->rows[0]; | |||
3297 | int first_row_length = ps->rows_length[0]; | |||
3298 | ||||
3299 | for (row = 0; row < last_row; row++) | |||
3300 | { | |||
3301 | ps->rows[row] = ps->rows[row + 1]; | |||
3302 | ps->rows_length[row] = ps->rows_length[row + 1]; | |||
3303 | } | |||
3304 | ||||
3305 | ps->rows[last_row] = first_row; | |||
3306 | ps->rows_length[last_row] = first_row_length; | |||
3307 | } | |||
3308 | ||||
3309 | ps->max_cycle -= start_cycle; | |||
3310 | ps->min_cycle -= start_cycle; | |||
3311 | } | |||
3312 | ||||
3313 | #endif /* INSN_SCHEDULING */ | |||
3314 | ||||
3315 | /* Run instruction scheduler. */ | |||
3316 | /* Perform SMS module scheduling. */ | |||
3317 | ||||
3318 | namespace { | |||
3319 | ||||
3320 | const pass_data pass_data_sms = | |||
3321 | { | |||
3322 | RTL_PASS, /* type */ | |||
3323 | "sms", /* name */ | |||
3324 | OPTGROUP_NONE, /* optinfo_flags */ | |||
3325 | TV_SMS, /* tv_id */ | |||
3326 | 0, /* properties_required */ | |||
3327 | 0, /* properties_provided */ | |||
3328 | 0, /* properties_destroyed */ | |||
3329 | 0, /* todo_flags_start */ | |||
3330 | TODO_df_finish(1 << 17), /* todo_flags_finish */ | |||
3331 | }; | |||
3332 | ||||
3333 | class pass_sms : public rtl_opt_pass | |||
3334 | { | |||
3335 | public: | |||
3336 | pass_sms (gcc::context *ctxt) | |||
3337 | : rtl_opt_pass (pass_data_sms, ctxt) | |||
3338 | {} | |||
3339 | ||||
3340 | /* opt_pass methods: */ | |||
3341 | bool gate (function *) final override | |||
3342 | { | |||
3343 | return (optimizeglobal_options.x_optimize > 0 && flag_modulo_schedglobal_options.x_flag_modulo_sched); | |||
3344 | } | |||
3345 | ||||
3346 | unsigned int execute (function *) final override; | |||
3347 | ||||
3348 | }; // class pass_sms | |||
3349 | ||||
3350 | unsigned int | |||
3351 | pass_sms::execute (function *fun ATTRIBUTE_UNUSED__attribute__ ((__unused__))) | |||
3352 | { | |||
3353 | #ifdef INSN_SCHEDULING | |||
3354 | basic_block bb; | |||
3355 | ||||
3356 | /* Collect loop information to be used in SMS. */ | |||
3357 | cfg_layout_initialize (0); | |||
3358 | sms_schedule (); | |||
3359 | ||||
3360 | /* Update the life information, because we add pseudos. */ | |||
3361 | max_regno = max_reg_num (); | |||
3362 | ||||
3363 | /* Finalize layout changes. */ | |||
3364 | FOR_EACH_BB_FN (bb, fun)for (bb = (fun)->cfg->x_entry_block_ptr->next_bb; bb != (fun)->cfg->x_exit_block_ptr; bb = bb->next_bb) | |||
3365 | if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun)((fun)->cfg->x_exit_block_ptr)) | |||
3366 | bb->aux = bb->next_bb; | |||
3367 | free_dominance_info (CDI_DOMINATORS); | |||
3368 | cfg_layout_finalize (); | |||
3369 | #endif /* INSN_SCHEDULING */ | |||
3370 | return 0; | |||
3371 | } | |||
3372 | ||||
3373 | } // anon namespace | |||
3374 | ||||
3375 | rtl_opt_pass * | |||
3376 | make_pass_sms (gcc::context *ctxt) | |||
3377 | { | |||
3378 | return new pass_sms (ctxt); | |||
3379 | } |