Re: a common belief or a wrong C[++] compiler?

From: beta (beta_at_s.l)
Date: 08/20/04


Date: Fri, 20 Aug 2004 08:10:31 GMT

On Thu, 19 Aug 2004 21:31:02 +0200, Herbert Kleebauer wrote:
>beta wrote:
>> On Wed, 18 Aug 2004 22:36:19 +0200, Herbert Kleebauer <klee@unibwm.de>
>> wrote:
>> >source in both cases). The result was a little bit surprising:
>> yes
>>
>> >16 bit com program: 24 seconds
>> >32 bit Windows prog: 28 seconds
>> >gcc -O3 : 14 seconds
>
>> in a pentium1 100MGh for my asm traslation only
>> asm 32 bits: 106 seconds
>> c 32 bits: 112 seconds (borland compiler)
>
>Can you post your code or execute the programs in
>
>http://137.193.64.130/speed.zip
>
>on your PC so we can compare the results.
This below is *similar* to the asm routine I use, it is possible to
find errors
nasm
section _DATA public align=4 class=DATA use32

section _TEXT public align=1 class=CODE use32

global _asm_crp

; parametri in a=x r=y utilizza z2=ax, z3=bx
mu:
  or eax, eax
  jnz .mu_a
 .mu_c:
  add eax, edx
  neg eax
  inc eax
  jmp short .mu_end

 .mu_a:
  or edx, edx
  jnz .mu_b
  jmp short .mu_c

 .mu_b:
  mul edx
  mov edx, eax
  and eax, 0ffffh
  shr edx, 16
  sub eax, edx
  jae .mu_end
  inc eax
 .mu_end:
  and eax, 0ffffh
  ret

; j *12 *8 k
; i2 i1 i4 i3
; 1 2 3 4 5 6 7 8
;16k, 20j, 24i, 28r, 32c, 36b, 40Ra, 44P_*p, 48P_*q
;i5=b, i6=[s], i22=[s+4], i4=[s+8], i1=[s+12], i2=j, i3=k
_asm_crp:
  push ebx
  push ecx
  push edx
  push esi
  push edi
  push ebp
  sub esp, 16
  mov ecx, [ esp + 44 ]
  mov eax, [ecx]
  mov edx, [ecx + 4]
  mov edi, eax
  mov ebp, edx
  shr edi, 16
  shr edx, 16
  and ebp, 0ffffh
  and eax, 0ffffh
  mov [esp + 12], eax
  mov [esp + 8 ], edx
  mov ecx, [esp + 48]
  xor esi, esi
  jmp .as_loop_in
;"jmp short" is not ok, but is ok the loop "short jmp" why?
.as_loop:
  mov eax, [esp + 12]
  mov edx, [ecx]
  call mu
  add ecx, 4
  mov [esp + 12], eax
  mov eax, [ecx]
  add eax, edi
  add ecx, 4
  and eax, 0ffffh
  mov [esp + 4], eax
  add ebp, [ecx]
  and ebp, 0ffffh
  add ecx, 4
  mov eax, [esp + 8]
  mov edx, [ecx]
  call mu
  add ecx, 4
  mov [esp + 8], eax
  mov eax, [esp + 12]
  xor eax, ebp
  mov edx, [ecx]
  call mu
  add ecx, 4
  mov ebx, eax
  mov edx, [esp + 4]
  xor edx, [esp + 8]
  add eax, edx
  and eax, 0ffffh
  mov edx, [ecx]
  call mu
  add ecx, 4
  mov [esp], eax
  add ebx, eax
  and ebx, 0ffffh
  xor [esp + 12], eax
  mov edi, ebp
  xor edi, eax
  mov ebp, [esp + 4]
  xor ebp, ebx
  inc esi
  xor [esp + 8], ebx
.as_loop_in:
  cmp esi, 8
  jb .as_loop

  mov eax, [esp + 12]
  mov edx, [ecx]
  call mu
  add ecx, 4
  mov [esp + 12], eax
  mov esi, [ecx]
  add esi, ebp
  add ecx, 4
  and edx, 0ffffh
  mov [esp + 4], esi
  mov ebp, [ecx]
  add ebp, edi
  add ecx, 4
  and ebp, 0ffffh
  mov eax, [esp + 8]
  mov edx, [ecx]
  call mu
  mov [esp + 8], eax ; a=i4, k=i3
  mov ecx, [esp + 44] ; r=i22, *12=i1
  mov ebx, [esp + 12]
  shl eax, 16
  shl esi, 16
  or ebp, eax
  or esi, ebx
  mov [ecx], esi
  mov [ecx+4], ebp
  add esp, 16
  pop ebp
  pop edi
  pop esi
  pop edx
  pop ecx
  pop ebx
ret



Relevant Pages