Re: improve strlen
- From: "hutch--" <spamtrap@xxxxxxxxxx>
- Date: Tue, 25 Oct 2005 17:57:21 +0000 (UTC)
jukka,
Here is a slightly tweaked version of the algo I posted. It unrolls a
block of code by 8 and replaces an immediate in the loop code with the
same value in a spare register. It is clocking up on my test PIV at
about 22% faster than the last version I posted.
I have done all of the testing on strings that are misaligned so that
the alignment code is forced to run.
;
«««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
fn_00401460:
mov [esp-4], esi
mov [esp-8], edi
mov [esp-0Ch], ebx
mov [esp-10h], ebp
mov ebx, 80808080h
mov ebp, 4
mov eax, [esp+4]
mov ecx, eax
add ecx, 3
and ecx, 0FFFFFFFCh
sub ecx, eax
mov esi, ecx
jz lbl2
sub eax, 1
lbl0:
add eax, 1
cmp BYTE PTR [eax], 0
jz lbl1
sub ecx, 1
jns lbl0
jmp lbl2
lbl1:
sub eax, [esp+4]
jmp lbl6
lbl2:
lea edx, [eax+3]
mov edi, edi
lbl3:
mov edi, [eax]
add eax, ebp
lea ecx, [edi-1010101h]
not edi
and ecx, edi
and ecx, ebx
jne lbl4
mov edi, [eax]
add eax, ebp
lea ecx, [edi-1010101h]
not edi
and ecx, edi
and ecx, ebx
jnz lbl4
mov edi, [eax]
add eax, ebp
lea ecx, [edi-1010101h]
not edi
and ecx, edi
and ecx, ebx
jnz lbl4
mov edi, [eax]
add eax, ebp
lea ecx, [edi-1010101h]
not edi
and ecx, edi
and ecx, ebx
jnz lbl4
mov edi, [eax]
add eax, ebp
lea ecx, [edi-1010101h]
not edi
and ecx, edi
and ecx, ebx
jnz lbl4
mov edi, [eax]
add eax, ebp
lea ecx, [edi-1010101h]
not edi
and ecx, edi
and ecx, ebx
jnz lbl4
mov edi, [eax]
add eax, ebp
lea ecx, [edi-1010101h]
not edi
and ecx, edi
and ecx, ebx
jnz lbl4
mov edi, [eax]
add eax, ebp
lea ecx, [edi-1010101h]
not edi
and ecx, edi
and ecx, ebx
je lbl3
lbl4:
test ecx, 8080h
jnz lbl5
shr ecx, 10h
add eax, 2
lbl5:
shl cl, 1
sbb eax, edx
add eax, esi
lbl6:
mov esi, [esp-4]
mov edi, [esp-8]
mov ebx, [esp-0Ch]
mov ebp, [esp-10h]
ret 4
;
«««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
Regards,
hutch at movsd dot com
.
- Follow-Ups:
- Re: improve strlen
- From: jukka@xxxxxxxxxxxx
- Re: improve strlen
- References:
- improve strlen
- From: Claudio Daffra
- Re: improve strlen
- From: spamtrap
- Re: improve strlen
- From: hutch--
- Re: improve strlen
- From: spamtrap
- Re: improve strlen
- From: jukka@xxxxxxxxxxxx
- Re: improve strlen
- From: jukka@xxxxxxxxxxxx
- Re: improve strlen
- From: jukka@xxxxxxxxxxxx
- improve strlen
- Prev by Date: Re: compiler generated output
- Next by Date: Re: compiler generated output
- Previous by thread: Re: improve strlen
- Next by thread: Re: improve strlen
- Index(es):
Relevant Pages
|