This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Gcc builtin review: isinf, insnan ...


I raised this issue before but didn't wrote patch so I should do it now.
I would be silent about glibc as it shares same flaw as gcc.

Main problem that these functions try to be branchless. Which causes
performance regression for most applications versus branched code.

A problem is that predicted branch is free while conditional store
always cost cycle. So you need to have unpredictable branch to get
performance gain. When branch is 95% predicted then branchless code
wouldn't pay for itself if it adds one cycle versus branched and
misprediction costs 20 cycles.

And NaN is quite exceptional value so branches will almost always be
predicted. Otherwise user has other problems, like that if 5% of his
data are NaN's then result will likely be garbage.

Then you have problem that with modern gcc you wont likely save branch.
Most of these functions are surrounded by if. From gcc-4.9 it will
optimize out that branch as its predicated and it results in simpler
code.

More evidence about that is that I took assembly of benchmark below and
changed conditional move to jump which improves performance back by 10%

For showing that I wrote simple example of branched isinf that is around
10% faster than builtin. 

#ifdef BRANCHED
static inline int
isinf (double dx)
{
  union u {
    double d;
    long l;
  };
  union u u;
  u.d = dx;
  long x = u.l;
  return 2 * x == 0xffe0000000000000 ? (x == 0x7ff0000000000000 ? 1 : -1) : 0;
}
#endif
int main()
{
  int ret;
  int i, j;
  double *d = malloc (800000);
  for (j=0; j<1000000; j++)
  for (i=0; i<1000; i++)
    if (__builtin_expect(isinf (d[i]),0))
      ret += 42;
  return ret;
}


	.file	"inf.c"
	.section	.text.unlikely,"ax",@progbits
.LCOLDB2:
	.section	.text.startup,"ax",@progbits
.LHOTB2:
	.p2align 4,,15
	.globl	main
	.type	main, @function
main:
.LFB0:
	.cfi_startproc
	pushq	%rbx
	.cfi_def_cfa_offset 16
	.cfi_offset 3, -16
	movl	$800000, %edi
	call	malloc
	movsd	.LC0(%rip), %xmm2
	leaq	8000(%rax), %rsi
	movsd	.LC1(%rip), %xmm1
	movl	$1000000, %edi
	.p2align 4,,10
	.p2align 3
.L2:
	movq	%rax, %rdx
	.p2align 4,,10
	.p2align 3
.L3:
	movsd	(%rdx), %xmm0
	andpd	%xmm2, %xmm0
	ucomisd	%xmm1, %xmm0
	ja	.LC
	leal	42(%rbx), %ebx
	.LC:
	addq	$8, %rdx
	cmpq	%rsi, %rdx
	jne	.L3
	subl	$1, %edi
	jne	.L2
	movl	%ebx, %eax
	popq	%rbx
	.cfi_def_cfa_offset 8
	ret

	.cfi_endproc
.LFE0:
	.size	main, .-main
	.section	.text.unlikely
.LCOLDE2:
	.section	.text.startup
.LHOTE2:
	.section	.rodata.cst16,"aM",@progbits,16
	.align 16
.LC0:
	.long	4294967295
	.long	2147483647
	.long	0
	.long	0
	.section	.rodata.cst8,"aM",@progbits,8
	.align 8
.LC1:
	.long	4294967295
	.long	2146435071
	.ident	"GCC: (Debian 4.9.2-9) 4.9.2"
	.section	.note.GNU-stack,"",@progbits


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]