count2.asm



That thread "about jump tables" is getting old enough to vote. Besides, this version doesn't have a jump table in it. It's still "inspired" by Alex's suggestion that counting characters in a text file might be an example of where a jump table would make sense. But after thinking a bit about what we were actually *doing* with the jump table (with its targets, actually), I thought this approach might work better. It does seem to be faster, counting my 38M "test file" - 1.6+ seconds, vs 2.8+ seconds for "count1" (with the jump table). There's some variation between runs of the same thing - these figures seem to be the most common, repeatable, values... (that's a K6-300 - not the most common processor, these days)

There are a ton more improvements to be made - some I know about, and no doubt some I don't (yet). I'm posting it even if it isn't "ready". Might as well start kicking it around. No use making it "nice" if it's going to have to be rewritten from scratch :)

Best,
Frank

global _start

section .bss
    charcounts  resd 80h

    vowels      resd 1
    cons        resd 1
    alphas      resd 1
    puncts      resd 1
    words       resd 1


fd resd 1 filesize resd 1


section .data

    uppervowels_list db 'AEIOUY', 0
    lowervowels_list db 'aeiouy', 0
    uppercons_list   db 'BCDFGHJKLMNPQRSTVWXZ', 0
    lowercons_list   db 'bcdfghjklmnpqrstvwxz', 0
    numbers_list     db '0123456789', 0
    puncts_list      db '!"#$%&', "'()*+,-./:;<=>?@[\]^_`{|}~", 0


msg_uppervowels db 10, 'Upper case vowels: ', 0 msg_lowervowels db 10, 'Lower case vowels: ', 0 msg_vowels db 10, 'Total vowels: ', 0 msg_uppercons db 10, 'Upper case consonants: ', 0 msg_lowercons db 10, 'Lower case consonants: ', 0 msg_cons db 10, 'Total consonants: ', 0 msg_alphas db 10, 'Total alpha characters: ', 0 msg_numbers db 10, 'Numerals: ', 0 msg_puncts db 10, 'Total punctuation: ', 0 msg_words db 10, 'Total words: ', 0 msg_tabs db 10, 'Tabs: ', 0 msg_spaces db 10, 'Spaces: ', 0 msg_crs db 10, 'Carriage Returns: ', 0 msg_lfs db 10, 'Linefeeds: ', 0



    msg_usage db 10, 'usage: count filename[.ext]', 10, 0

    newline db 10, 0



section .text
_start:
    nop

    cmp dword [esp], 2
    jz good_command_line
    mov esi, msg_usage
    call putz
    jmp exit

good_command_line:
    mov ebx, [esp + 8]    ; filename
    xor ecx, ecx          ; O_RDONLY
    mov eax, 5            ; __NR_open
    int 80h
    or eax, eax
    jns goodopen
    neg eax
    call showeaxd
    jmp exit

goodopen:
    mov [fd], eax
    mov ebx, eax
    xor ecx, ecx
    mov edx, 2      ; SEEK_END
    mov eax, 19     ; __NR_lseek
    int 80h

    mov [filesize], eax
    mov edi, eax


; build args structure for mmap push byte 0 ; push ebx ; fd push byte 1 ; MAP_SHARED push byte 1 ; PROT_READ push eax ; size push byte 0 ;

    mov ebx, esp  ; pointer to args structure
    mov eax, 90   ; __NR_mmap
    int 80h
    add esp, 24   ; "free" args structure

    mov esi, eax
    add edi, eax
    xor eax, eax

    cmp byte [esi], 20h
    jz ignore
    add dword [words], 1
ignore:
    mov ecx, eax
    cmp esi, edi
    jz show_results

;    mov al, [esi]
;    inc esi
    lodsb
    cmp al, 80h
    jae ignore
    add dword [charcounts + eax * 4], 1
%if 1
    cmp al, ' '
    jnz ignore
    cmp ecx, eax
    jz ignore
    add dword [words], 1
%endif
    jmp short ignore

show_results:

    mov esi, msg_uppervowels
    call putz
    mov ebx, uppervowels_list
    call add_em_up
    add [vowels], eax
    add [alphas], eax
    call showeaxd

    mov esi, msg_lowervowels
    call putz
    mov ebx, lowervowels_list
    call add_em_up
    add [vowels], eax
    add [alphas], eax
    call showeaxd

    mov esi, msg_vowels
    call putz
    mov eax, [vowels]
    call showeaxd

    mov esi, msg_uppercons
    call putz
    mov ebx, uppercons_list
    call add_em_up
    add [cons], eax
    add [alphas], eax
    call showeaxd

    mov esi, msg_lowercons
    call putz
    mov ebx, lowercons_list
    call add_em_up
    add [cons], eax
    add [alphas], eax
    call showeaxd

    mov esi, msg_cons
    call putz
    mov eax, [cons]
    call showeaxd

    mov esi, msg_alphas
    call putz
    mov eax, [alphas]
    call showeaxd


mov esi, msg_numbers call putz mov ebx, numbers_list call add_em_up call showeaxd

    mov esi, msg_words
    call putz
    mov eax, [words]
    call showeaxd

    mov esi, msg_tabs
    call putz
    mov eax, [charcounts + (9 * 4)]
    call showeaxd

    mov esi, msg_spaces
    call putz
    mov eax, [charcounts + (20h * 4)]
    call showeaxd

    mov esi, msg_lfs
    call putz
    mov eax, [charcounts + (10 * 4)]
    call showeaxd

    mov esi, msg_crs
    call putz
    mov eax, [charcounts + (13 * 4)]
    call showeaxd

    mov esi, msg_puncts
    call putz
    mov ebx, puncts_list
    call add_em_up
    call showeaxd

    mov esi, newline
    call putz
    call putz

    mov ebx, 21h
show_chars:
    mov eax, ebx
    call putc
    mov al, ' '
    call putc
    mov eax, [charcounts + ebx * 4]
    call showeaxd
    mov al, ' '
    call putc
    inc ebx
    cmp ebx, '~'
    jnz show_chars

    mov esi, newline
    call putz
    call putz

    ; unmap file
    ; close file


exit: mov ebx, eax mov eax, 1 int 80h

;-----------------------------
; in ebx -> zero-terminated list of (byte) indices
; out eax = sum
; ebx trashed

add_em_up:

    push ecx
    xor eax, eax
..top
    movzx ecx, byte [ebx]
    jecxz .done
    add eax, [charcounts + ecx * 4]
    inc ebx
    jmp short .top
..done:
    pop ecx
    ret
;--------------------------

;---------------------------
putc:
    push edx
    push ecx
    push ebx
    push eax

    mov eax, 4
    mov ebx, 1
    mov ecx, esp
    mov edx, 1
    int 80h

    pop eax
    pop ebx
    pop ecx
    pop edx
    ret
;-----------------------------



;-------------------------
putz:
    push eax
    push ebx
    push ecx
    push edx

    mov ecx, esi
    or edx, byte -1
..top:
    cmp byte [ecx + edx + 1], 1
    inc edx
    jnc .top

    mov ebx, 1
    mov eax, 4
    int 80h


pop edx pop ecx pop ebx pop eax ret ;---------------------------------

;---------------------------------
showeaxd:
    push eax
    push ebx
    push ecx
    push edx
    push esi

    sub esp, 10h
    lea ecx, [esp + 12]
    mov ebx, 10
    xor esi, esi
    mov byte [ecx], 0
..top:
    dec ecx
    xor edx, edx
    div ebx
    add dl, '0'
    mov [ecx], dl
    inc esi
    or eax, eax
    jnz .top

    mov edx, esi
    mov ebx, 1
    mov eax, 4
    int 80h



    add esp, 10h

    pop esi
    pop edx
    pop ecx
    pop ebx
    pop eax

    ret
;---------------------------------
.



Relevant Pages