This is the mail archive of the crossgcc@sources.redhat.com mailing list for the crossgcc project.

See the CrossGCC FAQ for lots more information.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

More inline arm assembly woes... (long)


This source:

[lib_source]
static inline UNS_32 fixp_mul_32u_nX( UNS_32 x, UNS_32 y, UNS_8 n ) {
	UNS_32 res, tmp;
	__asm__ __volatile__ (
		"umull	%0, %1, %3, %4			\n\t"
		"movs	%0, %0, lsr %2			\n\t"
		"rsb	%2, %2, #32			\n\t"
		"adc	%1, %0, %1, lsl %2		\n\t"
		: "=&r" (tmp), "=&r" (res), "+r" (n)
		: "r" (x), "r" (y)
	);
	return res;
}
static inline UNS_32 fixp_mul_32u_n16( UNS_32 x, UNS_32 y ) {
	return fixp_mul_32u_nX( x, y, 16 );
}
[/lib_source]

[main_source]
	int_a = int_rand[0];
	int_b = int_rand[1];
	result = fixp_mul_32u_n16( int_a, int_b );
	int_a = int_rand[2];
	int_b = int_rand[3];
	result = result + fixp_mul_32u_n16( int_a, int_b );

	/* Loop forever */
	while(1) { GPIOC->dr ^= 0x08; }
[/main_source]

compiles to this (using -O1):
	
[listing]
	int_a = int_rand[0];
    84ac:	e59f204c 	ldr	r2, [pc, #76]	; 8500 <main+0x344>
    84b0:	e8920003 	ldmia	r2, {r0, r1}
static inline UNS_32 fixp_mul_32u_nX( UNS_32 x, UNS_32 y, UNS_8 n ) {
    84b4:	e3a03010 	mov	r3, #16	; 0x10
	UNS_32 res, tmp;
	__asm__ __volatile__ (
    84b8:	e08ec190 	umull	ip, lr, r0, r1
    84bc:	e1b0c33c 	movs	ip, ip, lsr r3
    84c0:	e2633020 	rsb	r3, r3, #32	; 0x20
    84c4:	e0ace31e 	adc	lr, ip, lr, lsl r3
	int_b = int_rand[1];
	result = fixp_mul_32u_n16( int_a, int_b );
	int_a = int_rand[2];
    84c8:	e2820008 	add	r0, r2, #8	; 0x8
    84cc:	e8900003 	ldmia	r0, {r0, r1}
static inline UNS_32 fixp_mul_32u_nX( UNS_32 x, UNS_32 y, UNS_8 n ) {
    84d0:	e3a03010 	mov	r3, #16	; 0x10
	UNS_32 res, tmp;
	__asm__ __volatile__ (
    84d4:	e0828190 	umull	r8, r2, r0, r1
    84d8:	e1b08338 	movs	r8, r8, lsr r3
    84dc:	e2633020 	rsb	r3, r3, #32	; 0x20
    84e0:	e0a82312 	adc	r2, r8, r2, lsl r3
	int_b = int_rand[3];
	result = result + fixp_mul_32u_n16( int_a, int_b );

	/* Loop forever */
	while(1) { GPIOC->dr ^= 0x08; }
    84e4:	e3a0232f 	mov	r2, #-1140850688	; 0xbc000000
    84e8:	e1a026c2 	mov	r2, r2, asr #13
    84ec:	e5923000 	ldr	r3, [r2]
    84f0:	e2233008 	eor	r3, r3, #8	; 0x8
    84f4:	e5823000 	str	r3, [r2]
    84f8:	eafffffb 	b	84ec <main+0x330>
[/listing]

While I like the array access optimization, I'm appalled by the fact that gcc seems to have optimized the addition out of existence. Anyone have any ideas here?

--
Bryce Schober
Design Engineer
Dynon Avionics, Inc.
www.dynonavionics.com

---
[This E-mail scanned for viruses by digiposs.com]


------ Want more information? See the CrossGCC FAQ, http://www.objsw.com/CrossGCC/ Want to unsubscribe? Send a note to crossgcc-unsubscribe@sources.redhat.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]