Re: fastcode MMX tutorials?



Hi

I good starting point would be to download the CompareMem B&V

http://dennishomepage.gugs-cats.dk/CompareMemChallenge.htm

and look at this function

function CompareMem_DKC_MMX_1_a(P1, P2: Pointer; Length: Integer): Boolean;
asm
push ebx
push esi
push edi
test ecx,ecx
jle @ResultTrueExit
mov edi,eax
mov esi,edx
lea eax,[ecx+edi]
cmp ecx,32
jnl @IfEnd2
@Loop1Start :
movzx edx,[edi]
cmp dl,[esi]
jnz @ResultFalseExit
add edi,1
add esi,1
cmp eax,edi
jnbe @Loop1Start
xor eax,eax
add eax,1
pop edi
pop esi
pop ebx
ret
@IfEnd2 :
mov edx,edi
and edx,7
mov eax,8
sub eax,edx
mov ebx,edi
mov edx,[edi]
cmp edx,[esi]
jnz @ResultFalseExit
mov edx,[edi+4]
cmp edx,[esi+4]
jnz @ResultFalseExit
add edi,eax
add esi,eax
add ecx,ebx
sub ecx,16
@Loop3Start :
movq mm0, [edi]
movq mm1, [esi]
pcmpeqb mm1, mm0
packsswb mm1, mm1
movd ebx, mm1
cmp ebx, $FFFFFFFF
jnz @ResultFalseExit
add edi,8
add esi,8
movq mm0, [edi]
movq mm1, [esi]
pcmpeqb mm1, mm0
packsswb mm1, mm1
movd ebx, mm1
cmp ebx, $FFFFFFFF
jnz @ResultFalseExit
add edi,8
add esi,8
cmp edi,ecx
jle @Loop3Start
add ecx,16
cmp edi,ecx
jnl @ResultTrueExit
@Loop4Start :
movzx edx,[edi]
cmp dl,[esi]
jnz @ResultFalseExit
add edi,1
add esi,1
cmp ecx,edi
jnbe @Loop4Start
@ResultTrueExit :
emms
xor eax,eax
add eax,1
pop edi
pop esi
pop ebx
ret
@ResultFalseExit :
emms
xor eax,eax
pop edi
pop esi
pop ebx
ret
end;

It uses MMX to load 2 8 byte blocks into the MMX registers and compare them

movq mm0, [edi]
movq mm1, [esi]
pcmpeqb mm1, mm0

The result of the comparison is compressed with this instruction

packsswb mm1, mm1

then moved into a IA32 register

movd ebx, mm1

where it can be tested

cmp ebx, $FFFFFFFF

always remember

emms

after having used MMX.

Regards
Dennis


.



Relevant Pages