PR45273 - The compiler depends on the host double
Steven Bosscher
stevenb.gcc@gmail.com
Wed Mar 16 23:25:00 GMT 2011
Hi,
The attached patch replace sreal math with mpfr math in predict.c and
host double math in mcf.c also with mpfr math.
Bootstrapped&tested (incl. ada) on x86_64-unknown-linux-gnu. OK for trunk?
Anyone a better suggestion for PRED_PRECISION?
Ciao!
Steven
-------------- next part --------------
* sreal.c: Remove file.
* sreal.h: Remove file.
* Makefile.in: Remove/adjust make rules.
* predict.c: Include gmp.h and mpfr.h.
(PROB_PRECISION): Define.
(struct block_info_def): Replace sreal with mpfr_t.
(struct edge_info_def): Likewise.
(propagate_freq): Perform math with mpfr. Use FOR_ALL_BB instead
of loops over all basic blocks.
(counts_to_freq): Likewise.
(estimate_bb_frequencies): Likewise.
* mcf.c: Include gmp.h and mpfr.h.
(PROB_PRECISION): Define.
(K_POS, K_NEG, COST): Remove defines.
(K_POS_FACT, K_NEG_FACT): New defines.
(fixup_cost): Implement COST as function with mpfr math.
(mcf_ln): Remove function.
(mcf_sqrt): Idem, with apolies to John Carmack.
(create_fixup_graph): Use mpfr math.
Index: Makefile.in
===================================================================
--- Makefile.in (revision 170907)
+++ Makefile.in (working copy)
@@ -1347,7 +1347,6 @@ OBJS-common = \
sese.o \
simplify-rtx.o \
sparseset.o \
- sreal.o \
stack-ptr-mod.o \
statistics.o \
stmt.o \
@@ -3418,11 +3417,10 @@ reg-stack.o : reg-stack.c $(CONFIG_H) $(SYSTEM_H)
insn-config.h reload.h $(FUNCTION_H) $(TM_P_H) $(GGC_H) \
$(BASIC_BLOCK_H) $(CFGLAYOUT_H) output.h $(TIMEVAR_H) \
$(TREE_PASS_H) $(TARGET_H) vecprim.h $(DF_H) $(EMIT_RTL_H)
-sreal.o: sreal.c $(CONFIG_H) $(SYSTEM_H) coretypes.h sreal.h
predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(TREE_H) $(FLAGS_H) insn-config.h $(BASIC_BLOCK_H) $(REGS_H) \
hard-reg-set.h output.h $(DIAGNOSTIC_CORE_H) $(RECOG_H) $(FUNCTION_H) $(EXCEPT_H) \
- $(TM_P_H) $(PREDICT_H) sreal.h $(PARAMS_H) $(TARGET_H) $(CFGLOOP_H) \
+ $(TM_P_H) $(PREDICT_H) $(PARAMS_H) $(TARGET_H) $(CFGLOOP_H) \
$(COVERAGE_H) $(SCEV_H) $(GGC_H) predict.def $(TIMEVAR_H) $(TREE_DUMP_H) \
$(TREE_FLOW_H) $(TREE_PASS_H) $(EXPR_H) pointer-set.h
lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(DIAGNOSTIC_CORE_H) \
Index: predict.c
===================================================================
--- predict.c (revision 170907)
+++ predict.c (working copy)
@@ -27,7 +27,6 @@ along with GCC; see the file COPYING3. If not see
[3] "Corpus-based Static Branch Prediction"
Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
-
#include "config.h"
#include "system.h"
#include "coretypes.h"
@@ -48,7 +47,6 @@ along with GCC; see the file COPYING3. If not see
#include "expr.h"
#include "predict.h"
#include "coverage.h"
-#include "sreal.h"
#include "params.h"
#include "target.h"
#include "cfgloop.h"
@@ -61,11 +59,17 @@ along with GCC; see the file COPYING3. If not see
#include "cfgloop.h"
#include "pointer-set.h"
-/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
- 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
-static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
- real_inv_br_prob_base, real_one_half, real_bb_freq_max;
+/* Floating point computations are involved in frequency and probability
+ manipulations, but GCC must not depend on the host floating point format.
+ Therefore MPFR is used for all floating point math. */
+#include <gmp.h>
+#include <mpfr.h>
+/* Using greater precision than the host's widest integer is a recepe for
+ random profile inconsistencies due to excess precision. */
+#undef PROB_PRECISION
+#define PROB_PRECISION HOST_BITS_PER_WIDE_INT
+
/* Random guesstimation given names.
PROV_VERY_UNLIKELY should be small enough so basic block predicted
by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */
@@ -1877,7 +1881,7 @@ predict_paths_leading_to_edge (edge e, enum br_pre
typedef struct block_info_def
{
/* Estimated frequency of execution of basic_block. */
- sreal frequency;
+ mpfr_t frequency;
/* To keep queue of basic blocks to process. */
basic_block next;
@@ -1892,7 +1896,7 @@ typedef struct edge_info_def
/* In case edge is a loopback edge, the probability edge will be reached
in case header is. Estimated number of iterations of the loop can be
then computed as 1 / (1 - back_edge_prob). */
- sreal back_edge_prob;
+ mpfr_t back_edge_prob;
/* True if the edge is a loopback edge in the natural loop. */
unsigned int back_edge:1;
} *edge_info;
@@ -1913,6 +1917,9 @@ propagate_freq (basic_block head, bitmap tovisit)
edge e;
basic_block nextbb;
bitmap_iterator bi;
+ mpfr_t real_almost_one; /* constant: 1 - 1/REG_BR_PROB_BASE. */
+ mpfr_t cyclic_probability, frequency;
+ mpfr_t tmp;
/* For each basic block we need to visit count number of his predecessors
we need to visit first. */
@@ -1940,15 +1947,21 @@ propagate_freq (basic_block head, bitmap tovisit)
bb->count = bb->frequency = 0;
}
- memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
+ mpfr_inits2 (PROB_PRECISION,
+ real_almost_one, cyclic_probability, frequency, tmp, NULL);
+
+ mpfr_set_si (tmp, REG_BR_PROB_BASE, GMP_RNDN);
+ mpfr_si_div (tmp, 1, tmp, GMP_RNDN);
+ mpfr_si_sub (real_almost_one, 1, tmp, GMP_RNDN);
+
+ mpfr_set_si (BLOCK_INFO (head)->frequency, 1, GMP_RNDN);
last = head;
for (bb = head; bb; bb = nextbb)
{
edge_iterator ei;
- sreal cyclic_probability, frequency;
- memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
- memcpy (&frequency, &real_zero, sizeof (real_zero));
+ mpfr_set_si (cyclic_probability, 0, GMP_RNDN);
+ mpfr_set_si (frequency, 0, GMP_RNDN);
nextbb = BLOCK_INFO (bb)->next;
BLOCK_INFO (bb)->next = NULL;
@@ -1965,42 +1978,39 @@ propagate_freq (basic_block head, bitmap tovisit)
FOR_EACH_EDGE (e, ei, bb->preds)
if (EDGE_INFO (e)->back_edge)
{
- sreal_add (&cyclic_probability, &cyclic_probability,
- &EDGE_INFO (e)->back_edge_prob);
+ mpfr_add (cyclic_probability, cyclic_probability,
+ EDGE_INFO (e)->back_edge_prob, GMP_RNDN);
}
else if (!(e->flags & EDGE_DFS_BACK))
{
- sreal tmp;
-
/* frequency += (e->probability
* BLOCK_INFO (e->src)->frequency /
REG_BR_PROB_BASE); */
- sreal_init (&tmp, e->probability, 0);
- sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
- sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
- sreal_add (&frequency, &frequency, &tmp);
+ mpfr_set_si (tmp, e->probability, GMP_RNDN);
+ mpfr_mul (tmp,
+ tmp, BLOCK_INFO (e->src)->frequency,
+ GMP_RNDN);
+ mpfr_div_si (tmp, tmp, REG_BR_PROB_BASE, GMP_RNDN);
+ mpfr_add (frequency, frequency, tmp, GMP_RNDN);
}
- if (sreal_compare (&cyclic_probability, &real_zero) == 0)
- {
- memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
- sizeof (frequency));
- }
+ if (mpfr_cmp_si (cyclic_probability, 0) == 0)
+ mpfr_set (BLOCK_INFO (bb)->frequency, frequency, GMP_RNDN);
else
{
- if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
- {
- memcpy (&cyclic_probability, &real_almost_one,
- sizeof (real_almost_one));
- }
+ if (mpfr_cmp (cyclic_probability, real_almost_one) > 0)
+ mpfr_set (cyclic_probability, real_almost_one, GMP_RNDN);
/* BLOCK_INFO (bb)->frequency = frequency
/ (1 - cyclic_probability) */
- sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
- sreal_div (&BLOCK_INFO (bb)->frequency,
- &frequency, &cyclic_probability);
+ mpfr_si_sub (cyclic_probability,
+ 1, cyclic_probability,
+ GMP_RNDN);
+ mpfr_div (BLOCK_INFO (bb)->frequency,
+ frequency, cyclic_probability,
+ GMP_RNDN);
}
}
@@ -2009,16 +2019,15 @@ propagate_freq (basic_block head, bitmap tovisit)
e = find_edge (bb, head);
if (e)
{
- sreal tmp;
-
/* EDGE_INFO (e)->back_edge_prob
= ((e->probability * BLOCK_INFO (bb)->frequency)
/ REG_BR_PROB_BASE); */
- sreal_init (&tmp, e->probability, 0);
- sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
- sreal_mul (&EDGE_INFO (e)->back_edge_prob,
- &tmp, &real_inv_br_prob_base);
+ mpfr_set_si (tmp, e->probability, GMP_RNDN);
+ mpfr_mul (tmp, tmp, BLOCK_INFO (bb)->frequency, GMP_RNDN);
+ mpfr_div_si (EDGE_INFO (e)->back_edge_prob,
+ tmp, REG_BR_PROB_BASE,
+ GMP_RNDN);
}
/* Propagate to successor blocks. */
@@ -2038,6 +2047,8 @@ propagate_freq (basic_block head, bitmap tovisit)
}
}
}
+
+ mpfr_clears (real_almost_one, cyclic_probability, frequency, tmp, NULL);
}
/* Estimate probabilities of loopback edges in loops at same nest level. */
@@ -2099,11 +2110,11 @@ counts_to_freqs (void)
gcov_type count_max, true_count_max = 0;
basic_block bb;
- FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
+ FOR_ALL_BB (bb)
true_count_max = MAX (bb->count, true_count_max);
count_max = MAX (true_count_max, 1);
- FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
+ FOR_ALL_BB (bb)
bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
return true_count_max;
@@ -2156,24 +2167,11 @@ void
estimate_bb_frequencies (void)
{
basic_block bb;
- sreal freq_max;
if (profile_status != PROFILE_READ || !counts_to_freqs ())
{
- static int real_values_initialized = 0;
+ mpfr_t freq_max, tmp;
- if (!real_values_initialized)
- {
- real_values_initialized = 1;
- sreal_init (&real_zero, 0, 0);
- sreal_init (&real_one, 1, 0);
- sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
- sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
- sreal_init (&real_one_half, 1, -1);
- sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
- sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
- }
-
mark_dfs_back_edges ();
single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
@@ -2181,17 +2179,21 @@ estimate_bb_frequencies (void)
/* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (struct block_info_def));
alloc_aux_for_edges (sizeof (struct edge_info_def));
- FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
+ FOR_ALL_BB (bb)
{
edge e;
edge_iterator ei;
+ mpfr_init2 (BLOCK_INFO (bb)->frequency, PROB_PRECISION);
+
FOR_EACH_EDGE (e, ei, bb->succs)
{
- sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
- sreal_mul (&EDGE_INFO (e)->back_edge_prob,
- &EDGE_INFO (e)->back_edge_prob,
- &real_inv_br_prob_base);
+ mpfr_init2 (EDGE_INFO (e)->back_edge_prob, PROB_PRECISION);
+ mpfr_set_si (EDGE_INFO (e)->back_edge_prob, e->probability,
+ GMP_RNDN);
+ mpfr_div_si (EDGE_INFO (e)->back_edge_prob,
+ EDGE_INFO (e)->back_edge_prob, REG_BR_PROB_BASE,
+ GMP_RNDN);
}
}
@@ -2199,24 +2201,37 @@ estimate_bb_frequencies (void)
to outermost to examine probabilities for back edges. */
estimate_loops ();
- memcpy (&freq_max, &real_zero, sizeof (real_zero));
+ mpfr_inits2 (PROB_PRECISION, freq_max, tmp, NULL);
+
+ mpfr_set_si (freq_max, 0, GMP_RNDN);
FOR_EACH_BB (bb)
- if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
- memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
+ if (mpfr_cmp (freq_max, BLOCK_INFO (bb)->frequency) < 0)
+ mpfr_set (freq_max, BLOCK_INFO (bb)->frequency, GMP_RNDN);
- sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
- FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
+ mpfr_si_div (freq_max, BB_FREQ_MAX, freq_max, GMP_RNDN);
+ FOR_ALL_BB (bb)
{
- sreal tmp;
+ mpfr_mul (tmp, BLOCK_INFO (bb)->frequency, freq_max, GMP_RNDN);
+ bb->frequency = (int) mpfr_get_si (tmp, GMP_RNDU);
+ }
- sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
- sreal_add (&tmp, &tmp, &real_one_half);
- bb->frequency = sreal_to_int (&tmp);
+ mpfr_clears (freq_max, tmp, NULL);
+
+ /* Clean up block info for each basic block. */
+ FOR_ALL_BB (bb)
+ {
+ edge e;
+ edge_iterator ei;
+
+ mpfr_clear (BLOCK_INFO (bb)->frequency);
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ mpfr_clear (EDGE_INFO (e)->back_edge_prob);
}
-
free_aux_for_blocks ();
free_aux_for_edges ();
}
+
compute_function_frequency ();
}
Index: mcf.c
===================================================================
--- mcf.c (revision 170907)
+++ mcf.c (working copy)
@@ -55,16 +55,47 @@ along with GCC; see the file COPYING3. If not see
#include "profile.h"
+/* Floating point computations are involved in frequency and probability
+ manipulations, but GCC must not depend on the host floating point format.
+ Therefore MPFR is used for all floating point math. */
+#include <gmp.h>
+#include <mpfr.h>
+
+/* Using greater precision than the host's widest integer is a recepe for
+ random profile inconsistencies due to excess precision. */
+#undef PROB_PRECISION
+#define PROB_PRECISION HOST_BITS_PER_WIDE_INT
+
/* CAP_INFINITY: Constant to represent infinite capacity. */
#define CAP_INFINITY INTTYPE_MAXIMUM (HOST_WIDEST_INT)
/* COST FUNCTION. */
-#define K_POS(b) ((b))
-#define K_NEG(b) (50 * (b))
-#define COST(k, w) ((k) / mcf_ln ((w) + 2))
+#define K_POS_FACT 1
+#define K_NEG_FACT 50
+
+/* Cost is ((k) / mpfr_log ((w) + 2)). */
+static gcov_type
+fixup_cost (mpfr_t k, gcov_type w)
+{
+ gcov_type fcost;
+ mpfr_t tmp;
+
+ mpfr_init (tmp);
+ mpfr_set_si (tmp, (long int) w, GMP_RNDN);
+ mpfr_add_si (tmp, tmp, 2, GMP_RNDN);
+ mpfr_log (tmp, tmp, GMP_RNDN);
+ mpfr_div (tmp, k, tmp, GMP_RNDN);
+ fcost = mpfr_get_si (tmp, GMP_RNDN);
+ mpfr_clear (tmp);
+
+ return fcost;
+}
+
+
/* Limit the number of iterations for cancel_negative_cycles() to ensure
reasonable compile time. */
#define MAX_ITER(n, e) 10 + (1000000 / ((n) * (e)))
+
typedef enum
{
INVALID_EDGE,
@@ -318,52 +349,6 @@ dump_fixup_graph (FILE *file, fixup_graph_type *fi
}
-/* Utility routines. */
-/* ln() implementation: approximate calculation. Returns ln of X. */
-
-static double
-mcf_ln (double x)
-{
-#define E 2.71828
- int l = 1;
- double m = E;
-
- gcc_assert (x >= 0);
-
- while (m < x)
- {
- m *= E;
- l++;
- }
-
- return l;
-}
-
-
-/* sqrt() implementation: based on open source QUAKE3 code (magic sqrt
- implementation) by John Carmack. Returns sqrt of X. */
-
-static double
-mcf_sqrt (double x)
-{
-#define MAGIC_CONST1 0x1fbcf800
-#define MAGIC_CONST2 0x5f3759df
- union {
- int intPart;
- float floatPart;
- } convertor, convertor2;
-
- gcc_assert (x >= 0);
-
- convertor.floatPart = x;
- convertor2.floatPart = x;
- convertor.intPart = MAGIC_CONST1 + (convertor.intPart >> 1);
- convertor2.intPart = MAGIC_CONST2 - (convertor2.intPart >> 1);
-
- return 0.5f * (convertor.floatPart + (x * convertor2.floatPart));
-}
-
-
/* Common code shared between add_fixup_edge and add_rfixup_edge. Adds an edge
(SRC->DEST) to the edge_list maintained in FIXUP_GRAPH with cost of the edge
added set to COST. */
@@ -459,10 +444,12 @@ delete_fixup_graph (fixup_graph_type *fixup_graph)
static void
create_fixup_graph (fixup_graph_type *fixup_graph)
{
- double sqrt_avg_vertex_weight = 0;
- double total_vertex_weight = 0;
- double k_pos = 0;
- double k_neg = 0;
+ mpfr_t total_vertex_weight;
+ mpfr_t average_vertex_weight;
+ mpfr_t sqrt_avg_vertex_weight;
+ mpfr_t k_pos;
+ mpfr_t k_neg;
+
/* Vector to hold D(v) = sum_out_edges(v) - sum_in_edges(v). */
gcov_type *diff_out_in = NULL;
gcov_type supply_value = 1, demand_value = 0;
@@ -506,20 +493,31 @@ create_fixup_graph (fixup_graph_type *fixup_graph)
fixup_graph->edge_list =
(fixup_edge_p) xcalloc (fmax_num_edges, sizeof (fixup_edge_type));
+ /* Setup the remaining local variables. */
+ mpfr_init (total_vertex_weight);
+ mpfr_init (average_vertex_weight);
+ mpfr_init (sqrt_avg_vertex_weight);
+ mpfr_init (k_pos);
+ mpfr_init (k_neg);
+
diff_out_in =
(gcov_type *) xcalloc (1 + fnum_vertices_after_transform,
sizeof (gcov_type));
/* Compute constants b, k_pos, k_neg used in the cost function calculation.
b = sqrt(avg_vertex_weight(cfg)); k_pos = b; k_neg = 50b. */
+ mpfr_set_ui (total_vertex_weight, 0, GMP_RNDN);
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
- total_vertex_weight += bb->count;
+ mpfr_add_si (total_vertex_weight,
+ total_vertex_weight, bb->count,
+ GMP_RNDN);
+ mpfr_div_ui (average_vertex_weight,
+ total_vertex_weight, n_basic_blocks,
+ GMP_RNDN);
+ mpfr_sqrt (sqrt_avg_vertex_weight, average_vertex_weight, GMP_RNDN);
+ mpfr_mul_ui (k_pos, sqrt_avg_vertex_weight, K_POS_FACT, GMP_RNDN);
+ mpfr_mul_ui (k_neg, sqrt_avg_vertex_weight, K_NEG_FACT, GMP_RNDN);
- sqrt_avg_vertex_weight = mcf_sqrt (total_vertex_weight / n_basic_blocks);
-
- k_pos = K_POS (sqrt_avg_vertex_weight);
- k_neg = K_NEG (sqrt_avg_vertex_weight);
-
/* 1. Vertex Transformation: Split each vertex v into two vertices v' and v'',
connected by an edge e from v' to v''. w(e) = w(v). */
@@ -530,7 +528,7 @@ create_fixup_graph (fixup_graph_type *fixup_graph)
{
/* v'->v'': index1->(index1+1). */
i = 2 * bb->index;
- fcost = (gcov_type) COST (k_pos, bb->count);
+ fcost = fixup_cost (k_pos, bb->count);
add_fixup_edge (fixup_graph, i, i + 1, VERTEX_SPLIT_EDGE, bb->count,
fcost, CAP_INFINITY);
fixup_graph->num_vertices++;
@@ -542,7 +540,7 @@ create_fixup_graph (fixup_graph_type *fixup_graph)
if (EDGE_INFO (e) && EDGE_INFO (e)->ignore)
continue;
j = 2 * e->dest->index;
- fcost = (gcov_type) COST (k_pos, e->count);
+ fcost = fixup_cost (k_pos, e->count);
add_fixup_edge (fixup_graph, i + 1, j, REDIRECT_EDGE, e->count, fcost,
CAP_INFINITY);
}
@@ -580,7 +578,7 @@ create_fixup_graph (fixup_graph_type *fixup_graph)
{
/* Skip adding reverse edges for edges with w(e) = 0, as its maximum
capacity is 0. */
- fcost = (gcov_type) COST (k_neg, pfedge->weight);
+ fcost = (gcov_type) fixup_cost (k_neg, pfedge->weight);
add_fixup_edge (fixup_graph, pfedge->dest, pfedge->src,
REVERSE_EDGE, 0, fcost, pfedge->weight);
}
@@ -704,6 +702,12 @@ create_fixup_graph (fixup_graph_type *fixup_graph)
/* Cleanup. */
free (diff_out_in);
+
+ mpfr_clear (total_vertex_weight);
+ mpfr_clear (average_vertex_weight);
+ mpfr_clear (sqrt_avg_vertex_weight);
+ mpfr_clear (k_pos);
+ mpfr_clear (k_neg);
}
More information about the Gcc-patches
mailing list