Re: Hex to ascii




Terje Mathisen wrote:
randyhyde@xxxxxxxxxxxxx wrote:
Terje Mathisen wrote:
Anyway, my algorithm is totally different: It splits the 32-bit binary
number into two 5-digit (decimal) halfs, using a reciprocal
multiplication by 1/100000.'

Okay, I'll try that and see how it works.

I'm pretty confident it works, I did run an exhaustive test after all. :-)

I've still got to refactor my version a bit, but the interesting thing
is that your algorithm was barely edges out the "division by repeated
subtraction" algorithm.

Here are the results I got via RDTSC on my PIV:

Terje: 000000CC04E1F460
repeated subtraction: 000000B0960388E8

Now granted, I had to do a data copy with your algorithm so that it
would leave the data in the format I needed it in (and the subtraction
algorithm was able to do without the extra copy), and I suspect that I
can eliminate this copy by refactoring the results, but I was expecting
a *huge* difference in performance and was surprised by the fact that
the two algorithms came out fairly close to one another in performance.
I've posted my code, so see if you can spot any obvious mistakes that
would skew the results.
Cheers,
Randy Hyde

program t;
#include( "stdlib.hhf" )

static
OutputUnderscores :boolean := false;

procedure _unsToBuf32
(
d :dword in eax;
width :dword in ecx;
var buffer :char in edi
);
@noframe;
@nodisplay;
@noalignstack;

readonly(4)

noUSjt :dword[11] :=
[
&badWidth,
&noUS1,
&noUS2,
&noUS3,
&noUS4,
&noUS5,
&noUS6,
&noUS7,
&noUS8,
&noUS9,
&noUS10
];



#macro subDigit( subValue, posn ):done;
mov( '0', dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
done:
add( subValue, eax );
mov( dl, [edi-posn] );

#endmacro

begin _unsToBuf32;



// Drop down here if we're not outputting underscores inbetween
// groups of three digits in the number.

cmp( ecx, 10 );
ja badWidth;

jmp( noUSjt[ecx*4] );

noUS10:
mov( '1', dl );
sub( 1_000_000_000, eax );
sub( 1_000_000_000, eax );
jc done10;
add( 1, dl );
sub( 1_000_000_000, eax );
jc done10;
add( 1, dl );
sub( 1_000_000_000, eax );
jc done10;
add( 1, dl );
sub( 1_000_000_000, eax );
done10:
add( 1_000_000_000, eax );
mov( dl, [edi-9] );

noUS9:
subDigit( 100_000_000, 8 );
noUS8:
subDigit( 10_000_000, 7 );
noUS7:
subDigit( 1_000_000, 6 );
noUS6:
subDigit( 100_000, 5 );
noUS5:
subDigit( 10_000, 4 );
noUS4:
subDigit( 1_000, 3 );
noUS3:
subDigit( 100, 2 );
noUS2:
subDigit( 10, 1 );
noUS1:
or( '0', al );
mov( al, [edi] );
sub( ecx, edi ); // Point EDI at the first char in the
buffer
add( 1, edi );
ret();



badWidth:
raise( ex.WidthTooBig );

end _unsToBuf32;


procedure __unsToBuf32
(
d :dword;
width :dword;
var buffer :char in edi
);
@noframe;
@nodisplay;
@noalignstack;

var
asciiBuf :char[12];

readonly(4)
noUSjt :dword[11] :=
[
&badWidth,
&noUS1,
&noUS2,
&noUS3,
&noUS4,
&noUS5,
&noUS6,
&noUS7,
&noUS8,
&noUS9,
&noUS10
];

begin __unsToBuf32;

push( ebp );
mov( esp, ebp );
sub( _vars_, esp );
push( eax );
push( ebx );
push( ecx );
push( edx );
push( esi );

// Terje Mathisen

mov( d, ebx );
mov( 2814749767, eax );
mul( ebx ); // Reciprocal (2^n / 1e5) MUL
shr( 1, ebx );
xor( ecx,ecx );
add( ebx, eax );
adc( edx, ecx );
mov( 100_000, eax );
shr( 16, ecx ); // ECX = high part
mov( d, ebx );
intmul( ecx, eax ); // High part * 100k
sub( eax, ebx );
mov( 429497, eax );
mul( ecx );
mov( eax, ecx );
add( '0', dl );
mov( 429497, eax );
mov( dl, asciiBuf[0] );

mul( ebx );
mov( eax, ebx );
add( 7, ecx );
shr( 3, ecx );
add( '0', dl );
mov( dl, asciiBuf[5] );

add( 7, ebx );
shr( 3, ebx );
lea( ecx, [ecx+ecx*4] );
mov( ecx, edx );
and( $fff_ffff, ecx );
shr( 28, edx );
lea( ebx, [ebx+ebx*4] );
add( '0', dl );
mov( ebx, eax );
shr( 28, eax );
mov( dl, asciiBuf[1] );

and( $fffffff, ebx );
add( '0', al );
mov( al, asciiBuf[6] );

lea( ecx,[ecx+ecx*4] );
lea( ebx,[ebx+ebx*4] );
mov( ecx, edx );
mov( ebx, eax );
and( $7ffffff, ecx );
shr( 27, edx );
and( $7ffffff, ebx );
shr( 27, eax );
add( '0', dl );
add( '0', al );
mov( dl, asciiBuf[2] );
mov( al, asciiBuf[7] );

lea( ecx,[ecx+ecx*4] );
lea( ebx,[ebx+ebx*4] );
mov( ecx, edx );
mov( ebx, eax );
and( $3ffffff, ecx );
shr( 26, edx );
and( $3ffffff, ebx );
shr( 26, eax );
add( '0', dl );
add( '0', al);
mov( dl, asciiBuf[3] );
mov( al, asciiBuf[8] );
lea( ecx, [ecx+ecx*4] );
shr( 25, ecx );
lea( ebx, [ebx+ebx*4] );
shr( 25, ebx );
add( '0', cl );
add( '0', bl );
mov( cl, asciiBuf[4] );
mov( bl, asciiBuf[9] );

mov( width, ecx );
cmp( ecx, 10 );
ja badWidth;

jmp( noUSjt[ecx*4] );

noUS10:
mov( (type dword asciiBuf[0]), ebx );
mov( (type dword asciiBuf[4]), eax );
mov( (type word asciiBuf[8]), cx );
mov( ebx, [edi-9] );
mov( eax, [edi-5] );
mov( cx, [edi-1] );
sub( 9, edi );
jmp done;

noUS9:
mov( (type byte asciiBuf[1]), cl );
mov( (type word asciiBuf[2]), bx );
mov( (type dword asciiBuf[4]), eax );
mov( (type word asciiBuf[8]), dx );
mov( cl, [edi-8] );
mov( bx, [edi-7] );
mov( eax, [edi-5] );
mov( dx, [edi-1] );
sub( 8, edi );
jmp done;

noUS8:
mov( (type dword asciiBuf[2]), ebx );
mov( (type dword asciiBuf[6]), eax );
mov( ebx, [edi-7] );
mov( eax, [edi-3] );
sub( 7, edi );
jmp done;

noUS7:
mov( (type byte asciiBuf[3]), cl );
mov( (type word asciiBuf[4]), bx );
mov( (type dword asciiBuf[6]), eax );
mov( cl, [edi-6] );
mov( bx, [edi-5] );
mov( eax, [edi-3] );
sub( 6, edi );
jmp done;

noUS6:
mov( (type word asciiBuf[4]), bx );
mov( (type dword asciiBuf[6]), eax );
mov( bx, [edi-5] );
mov( eax, [edi-3] );
sub( 5, edi );
jmp done;

noUS5:
mov( (type byte asciiBuf[5]), bl );
mov( (type dword asciiBuf[6]), eax );
mov( bx, [edi-4] );
mov( eax, [edi-3] );
sub( 4, edi );
jmp done;

noUS4:
mov( (type dword asciiBuf[6]), eax );
mov( eax, [edi-3] );
sub( 3, edi );
jmp done;


noUS3:
mov( (type byte asciiBuf[7]), bl );
mov( (type word asciiBuf[8]), ax );
mov( bl, [edi-2] );
mov( ax, [edi-1] );
sub( 2, edi );
jmp done;

noUS2:
mov( (type word asciiBuf[8]), ax );
mov( ax, [edi-1] );
sub( 1, edi );
jmp done;

noUS1:
mov( (type byte asciiBuf[9]), al );
mov( al, [edi] );


done:
pop( esi );
pop( edx );
pop( ecx );
pop( ebx );
pop( eax );
leave();
ret( _parms_ );


badWidth:
raise( ex.WidthTooBig );

end __unsToBuf32;







var
start :dword[2];
time1 :dword[2];
time2 :dword[2];
buffer :byte[16];
buffer2 :byte[16];

begin t;

xor( ecx, ecx );
mov( 16, ecx );
lea( edi, buffer );
mov( ' ', al );
rep.stosb();
mov( 16, ecx );
lea( edi, buffer2 );
mov( ' ', al );
rep.stosb();
mov( 0, time1 );
mov( 0, time1[4] );
mov( 0, time2 );
mov( 0, time2[4] );

loopit0:

lea( edi, buffer[15] );
mov( ecx, eax );
conv.u32Size( ecx );
mov( eax, ebx );
rdtsc();
mov( eax, start );
mov( edx, start[4] );
__unsToBuf32( ecx, ebx, [edi] );
rdtsc();
sub( start, eax );
sbb( start[4], edx );
add( eax, time1 );
adc( edx, time1[4] );

push( ecx );
lea( edi, buffer2[15] );
rdtsc();
mov( eax, start );
mov( edx, start[4] );
_unsToBuf32( ecx, ebx, [edi] );
rdtsc();
sub( start, eax );
sbb( start[4], edx );
add( eax, time2 );
adc( edx, time2[4] );
pop( ecx );

mov( (type dword buffer), eax );
cmp( eax, (type dword buffer2) );
jne Failure;
mov( (type dword buffer[4]), eax );
cmp( eax, (type dword buffer2[4]) );
jne Failure;
mov( (type dword buffer[8]), eax );
cmp( eax, (type dword buffer2[8]) );
jne Failure;
mov( (type dword buffer[12]), eax );
cmp( eax, (type dword buffer2[12]) );
jne Failure;

add( 1, ecx );
jnz loopit0;

stdout.put( "Time1: " );
stdout.putd( time1[4] );
stdout.putd( time1[0] );
stdout.newln();
stdout.put( "Time2: " );
stdout.putd( time2[4] );
stdout.putd( time2[0] );
stdout.newln();
exit t;

Failure:
stdout.put( "Failed to compare at ", ecx, nl );



end t;

.



Relevant Pages

  • Re: Macro2D
    ... õ1, eax", 0 ... db "push striIIi", NL ... db ".2: cmp edi, ebp", NL ... db ".4: mov eax, edi", NL ...
    (alt.lang.asm)
  • Re: Help understanding uops, etc...
    ... get the stream parameter into EAX to fill stack variables ... mov eax, DWORD PTR _pStream$ ... mov DWORD PTR _in$, ecx ...
    (comp.lang.asm.x86)
  • Re: Optimization Questions
    ... mov ecx,; ... mov eax, ebx ... instructions go through port 0 and port 1. ...
    (comp.lang.asm.x86)
  • Re: Optimization Questions
    ... instructions go through port 0 and port 1. ... Pre-read the value in EAX ... mov, ax ... mov, ecx ...
    (comp.lang.asm.x86)
  • Re: Optimization Questions
    ... mov ecx,; ... sub edi, ecx ... mov eax, ebx ...
    (comp.lang.asm.x86)