prefetch seem don't work ?!



Hi all

I'm trying to use prefetch instrunction on AMD64 (939 3200+), without
result.
I have compile program with gcc in two different ways :
- with prefetch instruction and without but the time is same in both
test and in other test.

how can improve this program ?

thx in advance

claudio


Home$ time ./pr2

real 0m0.002s
user 0m0.000s
sys 0m0.004s
Home$

..........................................source code in c

#define LARGE_NUM 512000
#define ARR_SIZE (LARGE_NUM*8)

double a[LARGE_NUM] ;
double b[LARGE_NUM] ;
double c[LARGE_NUM] ;

int main ( void )
{
int i ;

for (i=0; i<LARGE_NUM; i++)
a[i] = b[i] + c[i] ;


return 0 ;
}


.................................source code in asm



.text
.p2align 4,,15
..globl main
.type main, @function
main:

xorl %eax, %eax
.p2align 4,,7


#*******************************
#*******************************
#*******************************

movq $64 , %rdx
movq $a , %rax
movq $b , %rbx
movq $c , %rcx


CICLO:
# prefetchw 512(%rax)
# prefetch 512(%rbx)
# prefetch 512(%rcx)

#1
movlpd (%rax) , %xmm0
addsd (%rbx) , %xmm0
movsd %xmm0 , (%rcx)
addq $8,%rax
addq $8,%rbx
addq $8,%rcx
#2
movlpd (%rax) , %xmm0
addsd (%rbx) , %xmm0
movsd %xmm0 , (%rcx)
addq $8,%rax
addq $8,%rbx
addq $8,%rcx
#3
movlpd (%rax) , %xmm0
addsd (%rbx) , %xmm0
movsd %xmm0 , (%rcx)
addq $8,%rax
addq $8,%rbx
addq $8,%rcx
#4
movlpd (%rax) , %xmm0
addsd (%rbx) , %xmm0
movsd %xmm0 , (%rcx)
addq $8,%rax
addq $8,%rbx
addq $8,%rcx
#5
movlpd (%rax) , %xmm0
addsd (%rbx) , %xmm0
movsd %xmm0 , (%rcx)
addq $8,%rax
addq $8,%rbx
addq $8,%rcx
#6
movlpd (%rax) , %xmm0
addsd (%rbx) , %xmm0
movsd %xmm0 , (%rcx)
addq $8,%rax
addq $8,%rbx
addq $8,%rcx
#7
movlpd (%rax) , %xmm0
addsd (%rbx) , %xmm0
movsd %xmm0 , (%rcx)
addq $8,%rax
addq $8,%rbx
addq $8,%rcx
#8
movlpd (%rax) , %xmm0
addsd (%rbx) , %xmm0
movsd %xmm0 , (%rcx)
addq $8,%rax
addq $8,%rbx
addq $8,%rcx

decq %rdx
jnz CICLO


#*******************************
#*******************************
#*******************************


xorl %eax, %eax
ret

.comm c,4096000,32
.comm b,4096000,32
.comm a,4096000,32

.