901-backport-fix-for-bug-32044.patch 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. Index: gcc-4.3.2/gcc/tree-scalar-evolution.c
  2. ===================================================================
  3. --- gcc-4.3.2.orig/gcc/tree-scalar-evolution.c 2009-01-28 10:14:37.000000000 +0100
  4. +++ gcc-4.3.2/gcc/tree-scalar-evolution.c 2009-01-28 10:17:50.000000000 +0100
  5. @@ -2716,6 +2716,50 @@
  6. scalar_evolution_info = NULL;
  7. }
  8. +/* Returns true if the expression EXPR is considered to be too expensive
  9. + for scev_const_prop. */
  10. +
  11. +bool
  12. +expression_expensive_p (tree expr)
  13. +{
  14. + enum tree_code code;
  15. +
  16. + if (is_gimple_val (expr))
  17. + return false;
  18. +
  19. + code = TREE_CODE (expr);
  20. + if (code == TRUNC_DIV_EXPR
  21. + || code == CEIL_DIV_EXPR
  22. + || code == FLOOR_DIV_EXPR
  23. + || code == ROUND_DIV_EXPR
  24. + || code == TRUNC_MOD_EXPR
  25. + || code == CEIL_MOD_EXPR
  26. + || code == FLOOR_MOD_EXPR
  27. + || code == ROUND_MOD_EXPR
  28. + || code == EXACT_DIV_EXPR)
  29. + {
  30. + /* Division by power of two is usually cheap, so we allow it.
  31. + Forbid anything else. */
  32. + if (!integer_pow2p (TREE_OPERAND (expr, 1)))
  33. + return true;
  34. + }
  35. +
  36. + switch (TREE_CODE_CLASS (code))
  37. + {
  38. + case tcc_binary:
  39. + case tcc_comparison:
  40. + if (expression_expensive_p (TREE_OPERAND (expr, 1)))
  41. + return true;
  42. +
  43. + /* Fallthru. */
  44. + case tcc_unary:
  45. + return expression_expensive_p (TREE_OPERAND (expr, 0));
  46. +
  47. + default:
  48. + return true;
  49. + }
  50. +}
  51. +
  52. /* Replace ssa names for that scev can prove they are constant by the
  53. appropriate constants. Also perform final value replacement in loops,
  54. in case the replacement expressions are cheap.
  55. @@ -2802,12 +2846,6 @@
  56. continue;
  57. niter = number_of_latch_executions (loop);
  58. - /* We used to check here whether the computation of NITER is expensive,
  59. - and avoided final value elimination if that is the case. The problem
  60. - is that it is hard to evaluate whether the expression is too
  61. - expensive, as we do not know what optimization opportunities the
  62. - the elimination of the final value may reveal. Therefore, we now
  63. - eliminate the final values of induction variables unconditionally. */
  64. if (niter == chrec_dont_know)
  65. continue;
  66. @@ -2838,7 +2876,15 @@
  67. /* Moving the computation from the loop may prolong life range
  68. of some ssa names, which may cause problems if they appear
  69. on abnormal edges. */
  70. - || contains_abnormal_ssa_name_p (def))
  71. + || contains_abnormal_ssa_name_p (def)
  72. + /* Do not emit expensive expressions. The rationale is that
  73. + when someone writes a code like
  74. +
  75. + while (n > 45) n -= 45;
  76. +
  77. + he probably knows that n is not large, and does not want it
  78. + to be turned into n %= 45. */
  79. + || expression_expensive_p (def))
  80. continue;
  81. /* Eliminate the PHI node and replace it by a computation outside
  82. Index: gcc-4.3.2/gcc/tree-scalar-evolution.h
  83. ===================================================================
  84. --- gcc-4.3.2.orig/gcc/tree-scalar-evolution.h 2009-01-28 10:22:47.000000000 +0100
  85. +++ gcc-4.3.2/gcc/tree-scalar-evolution.h 2009-01-28 10:23:10.000000000 +0100
  86. @@ -35,6 +35,7 @@
  87. extern void scev_analysis (void);
  88. unsigned int scev_const_prop (void);
  89. +bool expression_expensive_p (tree);
  90. extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
  91. /* Returns the loop of the polynomial chrec CHREC. */
  92. Index: gcc-4.3.2/gcc/testsuite/gcc.dg/pr34027-1.c
  93. ===================================================================
  94. --- gcc-4.3.2.orig/gcc/testsuite/gcc.dg/pr34027-1.c 2009-01-28 10:24:09.000000000 +0100
  95. +++ gcc-4.3.2/gcc/testsuite/gcc.dg/pr34027-1.c 2009-01-28 10:24:43.000000000 +0100
  96. @@ -8,5 +8,9 @@
  97. return ns;
  98. }
  99. -/* { dg-final { scan-tree-dump "ns % 10000" "optimized" } } */
  100. +/* This test was originally introduced to test that we transform
  101. + to ns % 10000. See the discussion of PR 32044 why we do not do
  102. + that anymore. */
  103. +/* { dg-final { scan-tree-dump-times "%" 0 "optimized" } } */
  104. +/* { dg-final { scan-tree-dump-times "/" 0 "optimized" } } */
  105. /* { dg-final { cleanup-tree-dump "optimized" } } */
  106. Index: gcc-4.3.2/gcc/testsuite/gcc.dg/tree-ssa/pr32044.c
  107. ===================================================================
  108. --- /dev/null 1970-01-01 00:00:00.000000000 +0000
  109. +++ gcc-4.3.2/gcc/testsuite/gcc.dg/tree-ssa/pr32044.c 2009-01-28 10:25:50.000000000 +0100
  110. @@ -0,0 +1,55 @@
  111. +/* { dg-do compile } */
  112. +/* { dg-options "-O2 -fdump-tree-empty -fdump-tree-final_cleanup" } */
  113. +
  114. +int foo (int n)
  115. +{
  116. + while (n >= 45)
  117. + n -= 45;
  118. +
  119. + return n;
  120. +}
  121. +
  122. +int bar (int n)
  123. +{
  124. + while (n >= 64)
  125. + n -= 64;
  126. +
  127. + return n;
  128. +}
  129. +
  130. +int bla (int n)
  131. +{
  132. + int i = 0;
  133. +
  134. + while (n >= 45)
  135. + {
  136. + i++;
  137. + n -= 45;
  138. + }
  139. +
  140. + return i;
  141. +}
  142. +
  143. +int baz (int n)
  144. +{
  145. + int i = 0;
  146. +
  147. + while (n >= 64)
  148. + {
  149. + i++;
  150. + n -= 64;
  151. + }
  152. +
  153. + return i;
  154. +}
  155. +
  156. +/* The loops computing division/modulo by 64 should be eliminated. */
  157. +/* { dg-final { scan-tree-dump-times "Removing empty loop" 2 "empty" } } */
  158. +
  159. +/* There should be no division/modulo in the final dump (division and modulo
  160. + by 64 are done using bit operations). */
  161. +/* { dg-final { scan-tree-dump-times "/" 0 "final_cleanup" } } */
  162. +/* { dg-final { scan-tree-dump-times "%" 0 "final_cleanup" } } */
  163. +
  164. +/* { dg-final { cleanup-tree-dump "empty" } } */
  165. +/* { dg-final { cleanup-tree-dump "final_cleanup" } } */
  166. Index: gcc-4.3.2/gcc/tree-ssa-loop-ivopts.c
  167. ===================================================================
  168. --- gcc-4.3.2.orig/gcc/tree-ssa-loop-ivopts.c 2009-01-28 10:26:04.000000000 +0100
  169. +++ gcc-4.3.2/gcc/tree-ssa-loop-ivopts.c 2009-01-28 10:27:09.000000000 +0100
  170. @@ -3778,7 +3778,12 @@
  171. return false;
  172. cand_value_at (loop, cand, use->stmt, nit, &bnd);
  173. +
  174. *bound = aff_combination_to_tree (&bnd);
  175. + /* It is unlikely that computing the number of iterations using division
  176. + would be more profitable than keeping the original induction variable. */
  177. + if (expression_expensive_p (*bound))
  178. + return false;
  179. return true;
  180. }