Re: asm grep



Charles Crayne wrote:
On Thu, 27 Dec 2007 04:49:05 GMT
Robert Redelmeier <redelm@xxxxxxxxxxxxxxx> wrote:


That is pretty easy -- just do a fixed size read (I'd suggest 512
or 4096 bytes). Then I'd just run a REP / SCASB or CMP AL,[ESI+ECX]
/ JNZ on the anchor. Watch for a match in the block-breaks.


Or, better yet, mmap the file and get rid of the block-break code, as
well as the file reads.

I'm likin' that approach! The current "yangrep.asm" runs 282 bytes, and is a *lot* faster than the original. Still not as fast as C grep (on my "big" file), and I removed all the features Konstantin had lovingly added. I'll probably paste most of 'em back in. The option I *use* most, on the real grep, is "i", and that doesn't sit well with rep cmpsb...

Later,
Frank

; nasm -f bin -I/where/ever/asmutils-0.18/inc/ -d__LINUX__ -d__ELF -d__ELF_MACROS__ yangrep.asm
; chmod +x yangrep


%include "system.inc"

%imacro astrlen 0-*

; single operand - eax is implied dest
%if %0 = 1
%ifidni %1, eax
%error "single operand form - eax is implied destination!!!"
%endif

or eax, byte -1
%%getlen:
cmp [%1 + eax + 1], byte 1
inc eax
jnc %%getlen

; two operand form
%elif %0 = 2
%ifidni %1, %2
%error "src and dest must not be the same!!!"
%endif

; fake an "%ifnreg"

%assign %%isreg 0

%ifidni %1, eax
%assign %%isreg 1
%elifidni %1, ebx
%assign %%isreg 1
%elifidni %1, ecx
%assign %%isreg 1
%elifidni %1, edx
%assign %%isreg 1
%elifidni %1, esi
%assign %%isreg 1
%elifidni %1, edi
%assign %%isreg 1
%elifidni %1, ebp
%assign %%isreg 1
%endif

%if %%isreg = 0
%error "destination must be a GP register!!!"
%endif


or %1, byte -1
%%getlen2:
cmp [%1 + %2 + 1], byte 1
inc %1
jnc %%getlen2
%else
%error "usage: strlen src (reg/label) or strlen dest (reg) src (reg/label)."
%endif
%endm


CODESEG

do_exit:
sys_exit [retcode]

START:
cld ; okay?
_mov ebp,STDIN ;file handle (STDIN if no args)
mov [retcode],byte 1

pop ebx
dec ebx
jz do_exit

pop esi ; our name - discard
pop esi ;get pattern

astrlen esi
mov [needle_size], eax

..next_file:
pop ebx ;pop filename pointer
or ebx,ebx
jz do_exit ;exit if no more args


sys_open EMPTY,O_RDONLY
mov ebp,eax
test eax,eax
js .next_file

sys_lseek ebp, 0, SEEK_END
mov [filesize], eax
mov [bytesleft], eax

sys_mmap 0, eax, PROT_READ, MAP_PRIVATE, ebp, 0
cmp eax, -4096
ja do_exit
mov [mptr], eax

mov edi, [mptr]
..matchloop:
mov ecx, [bytesleft]
mov al, [esi] ; first char of "needle"
repne scasb
jecxz .next_file
mov [bytesleft], ecx

push edi
push esi
dec edi
mov ecx, [needle_size]
repe cmpsb
pop esi
pop edi
jnz .matchloop

mov byte [retcode], 0 ; found something

; scan backwards for the beginning of line
; don't run off the front of the file!

mov ecx, edi
sub ecx, [mptr]
mov [realoff], edi
mov al, __n
std
repne scasb
cld
jecxz .beginning_of_file
inc edi ; if we found lf
inc edi ; skip it
..beginning_of_file:
mov edx, edi ; save a copy

; scan frontwards for lf - to get strlen of line
or ecx, byte -1
repne scasb
not ecx

xchg ecx, edx
sys_write STDOUT
jmp short .matchloop


UDATASEG

realoff resd 1
retcode resd 1

filesize resd 1
bytesleft resd 1
mptr resd 1
needle_size resd 1

END
.



Relevant Pages