Re: Fastcode Unofficial D5 CharPos B&V



Hi

The fastest Blended target function that is still in is

CharPos_Sha_IA32_1_a


A simple library looks like this

unit CharPosD5Unit;

interface

function CharPos(ch: char; const s: AnsiString): integer;

implementation

function CharPos(ch: char; const s: AnsiString): integer;
asm
test edx,edx
jz @@ret0

push ebp
push ebx
push edx
push esi
push edi
mov ecx,[edx-4]

xor ecx,-1
jz @@pop0

mov ah,al; add ecx,1
movzx edi,ax; and ecx,-4
shl eax,16; sub edx,ecx
or edi,eax; mov ebp,$80808080

mov eax,edi
xor edi,[ecx+edx]
mov esi,eax
lea ebx,[edi-$01010101]
xor edi,-1
and ebx,edi
add ecx,4
jge @@last1
and ebx,ebp
jnz @@found4;
xor esi,[ecx+edx]
mov ebp,ebp //nop
@@find:
lea ebx,[esi-$01010101]
xor esi,-1
and ebx,esi
mov edi,[ecx+edx+4]
add ecx,8
jge @@last2
xor edi,eax
and ebx,ebp
mov esi,[ecx+edx]
jnz @@found0
lea ebx,[edi-$01010101]
xor edi,-1
and ebx,edi
xor esi,eax
and ebx,ebp
jz @@find;
@@found4:
add ecx,4
@@found0:
shr ebx,8; jc @@inc0
shr ebx,8; jc @@inc1
shr ebx,8; jc @@inc2
@@inc3: inc ecx
@@inc2: inc ecx
@@inc1: inc ecx
@@inc0:
pop edi;
pop esi; lea eax,[ecx+edx-7]
pop edx;
pop ebx; sub eax,edx
pop ebp; cmp eax,[edx-4]
jg @@ret0
ret
@@last2:
and ebx,ebp
jnz @@found0
xor edi,eax
lea ebx,[edi-$01010101]
xor edi,-1
and ebx,edi
@@last1:
and ebx,ebp
jnz @@found4;
@@pop0:
pop edi
pop esi
pop edx
pop ebx
pop ebp
@@ret0:
xor eax,eax
//ret
end;

end.

Best regards
Dennis Kjaer Christensen


.



Relevant Pages