count2.asm
- From: Frank Kotler <fbkotler@xxxxxxxxxxx>
- Date: Mon, 25 Jul 2005 13:58:46 -0400
That thread "about jump tables" is getting old enough to vote. Besides, this version doesn't have a jump table in it. It's still "inspired" by Alex's suggestion that counting characters in a text file might be an example of where a jump table would make sense. But after thinking a bit about what we were actually *doing* with the jump table (with its targets, actually), I thought this approach might work better. It does seem to be faster, counting my 38M "test file" - 1.6+ seconds, vs 2.8+ seconds for "count1" (with the jump table). There's some variation between runs of the same thing - these figures seem to be the most common, repeatable, values... (that's a K6-300 - not the most common processor, these days)
There are a ton more improvements to be made - some I know about, and no doubt some I don't (yet). I'm posting it even if it isn't "ready". Might as well start kicking it around. No use making it "nice" if it's going to have to be rewritten from scratch :)
Best, Frank
global _start
section .bss
charcounts resd 80h vowels resd 1
cons resd 1
alphas resd 1
puncts resd 1
words resd 1
fd resd 1 filesize resd 1
section .data
uppervowels_list db 'AEIOUY', 0
lowervowels_list db 'aeiouy', 0
uppercons_list db 'BCDFGHJKLMNPQRSTVWXZ', 0
lowercons_list db 'bcdfghjklmnpqrstvwxz', 0
numbers_list db '0123456789', 0
puncts_list db '!"#$%&', "'()*+,-./:;<=>?@[\]^_`{|}~", 0
msg_uppervowels db 10, 'Upper case vowels: ', 0 msg_lowervowels db 10, 'Lower case vowels: ', 0 msg_vowels db 10, 'Total vowels: ', 0 msg_uppercons db 10, 'Upper case consonants: ', 0 msg_lowercons db 10, 'Lower case consonants: ', 0 msg_cons db 10, 'Total consonants: ', 0 msg_alphas db 10, 'Total alpha characters: ', 0 msg_numbers db 10, 'Numerals: ', 0 msg_puncts db 10, 'Total punctuation: ', 0 msg_words db 10, 'Total words: ', 0 msg_tabs db 10, 'Tabs: ', 0 msg_spaces db 10, 'Spaces: ', 0 msg_crs db 10, 'Carriage Returns: ', 0 msg_lfs db 10, 'Linefeeds: ', 0
msg_usage db 10, 'usage: count filename[.ext]', 10, 0
newline db 10, 0
section .text
_start:
nop cmp dword [esp], 2
jz good_command_line
mov esi, msg_usage
call putz
jmp exitgood_command_line:
mov ebx, [esp + 8] ; filename
xor ecx, ecx ; O_RDONLY
mov eax, 5 ; __NR_open
int 80h
or eax, eax
jns goodopen
neg eax
call showeaxd
jmp exitgoodopen:
mov [fd], eax
mov ebx, eax
xor ecx, ecx
mov edx, 2 ; SEEK_END
mov eax, 19 ; __NR_lseek
int 80h mov [filesize], eax
mov edi, eax
; build args structure for mmap push byte 0 ; push ebx ; fd push byte 1 ; MAP_SHARED push byte 1 ; PROT_READ push eax ; size push byte 0 ;
mov ebx, esp ; pointer to args structure
mov eax, 90 ; __NR_mmap
int 80h
add esp, 24 ; "free" args structure mov esi, eax
add edi, eax
xor eax, eax cmp byte [esi], 20h
jz ignore
add dword [words], 1
ignore:
mov ecx, eax
cmp esi, edi
jz show_results; mov al, [esi]
; inc esi
lodsb
cmp al, 80h
jae ignore
add dword [charcounts + eax * 4], 1
%if 1
cmp al, ' '
jnz ignore
cmp ecx, eax
jz ignore
add dword [words], 1
%endif
jmp short ignoreshow_results:
mov esi, msg_uppervowels
call putz
mov ebx, uppervowels_list
call add_em_up
add [vowels], eax
add [alphas], eax
call showeaxd mov esi, msg_lowervowels
call putz
mov ebx, lowervowels_list
call add_em_up
add [vowels], eax
add [alphas], eax
call showeaxd mov esi, msg_vowels
call putz
mov eax, [vowels]
call showeaxd mov esi, msg_uppercons
call putz
mov ebx, uppercons_list
call add_em_up
add [cons], eax
add [alphas], eax
call showeaxd mov esi, msg_lowercons
call putz
mov ebx, lowercons_list
call add_em_up
add [cons], eax
add [alphas], eax
call showeaxd mov esi, msg_cons
call putz
mov eax, [cons]
call showeaxd mov esi, msg_alphas
call putz
mov eax, [alphas]
call showeaxd
mov esi, msg_numbers call putz mov ebx, numbers_list call add_em_up call showeaxd
mov esi, msg_words
call putz
mov eax, [words]
call showeaxd mov esi, msg_tabs
call putz
mov eax, [charcounts + (9 * 4)]
call showeaxd mov esi, msg_spaces
call putz
mov eax, [charcounts + (20h * 4)]
call showeaxd mov esi, msg_lfs
call putz
mov eax, [charcounts + (10 * 4)]
call showeaxd mov esi, msg_crs
call putz
mov eax, [charcounts + (13 * 4)]
call showeaxd mov esi, msg_puncts
call putz
mov ebx, puncts_list
call add_em_up
call showeaxd mov esi, newline
call putz
call putz mov ebx, 21h
show_chars:
mov eax, ebx
call putc
mov al, ' '
call putc
mov eax, [charcounts + ebx * 4]
call showeaxd
mov al, ' '
call putc
inc ebx
cmp ebx, '~'
jnz show_chars mov esi, newline
call putz
call putz ; unmap file
; close file
exit: mov ebx, eax mov eax, 1 int 80h
;----------------------------- ; in ebx -> zero-terminated list of (byte) indices ; out eax = sum ; ebx trashed
add_em_up:
push ecx
xor eax, eax
..top
movzx ecx, byte [ebx]
jecxz .done
add eax, [charcounts + ecx * 4]
inc ebx
jmp short .top
..done:
pop ecx
ret
;--------------------------;---------------------------
putc:
push edx
push ecx
push ebx
push eax mov eax, 4
mov ebx, 1
mov ecx, esp
mov edx, 1
int 80h pop eax
pop ebx
pop ecx
pop edx
ret
;-----------------------------;-------------------------
putz:
push eax
push ebx
push ecx
push edx mov ecx, esi
or edx, byte -1
..top:
cmp byte [ecx + edx + 1], 1
inc edx
jnc .top mov ebx, 1
mov eax, 4
int 80h
pop edx pop ecx pop ebx pop eax ret ;---------------------------------
;---------------------------------
showeaxd:
push eax
push ebx
push ecx
push edx
push esi sub esp, 10h
lea ecx, [esp + 12]
mov ebx, 10
xor esi, esi
mov byte [ecx], 0
..top:
dec ecx
xor edx, edx
div ebx
add dl, '0'
mov [ecx], dl
inc esi
or eax, eax
jnz .top mov edx, esi
mov ebx, 1
mov eax, 4
int 80hadd esp, 10h
pop esi
pop edx
pop ecx
pop ebx
pop eaxret ;--------------------------------- .
- Follow-Ups:
- Re: count2.asm
- From: randyhyde
- Re: count2.asm
- From: ¬a\\/b
- Re: count2.asm
- From: Alex McDonald
- Re: count2.asm
- From: Herbert Kleebauer
- Re: count2.asm
- From: Robert Redelmeier
- Re: count2.asm
- Prev by Date: Re: Hooking Screen
- Next by Date: Re: count2.asm
- Previous by thread: Intel Syntax Problems.
- Next by thread: Re: count2.asm
- Index(es):
Relevant Pages
|