Re: Hex to ascii
- From: "randyhyde@xxxxxxxxxxxxx" <spamtrap@xxxxxxxxxx>
- Date: 24 Sep 2006 09:22:09 -0700
Terje Mathisen wrote:
randyhyde@xxxxxxxxxxxxx wrote:
Terje Mathisen wrote:
Anyway, my algorithm is totally different: It splits the 32-bit binary
number into two 5-digit (decimal) halfs, using a reciprocal
multiplication by 1/100000.'
Okay, I'll try that and see how it works.
I'm pretty confident it works, I did run an exhaustive test after all. :-)
I've still got to refactor my version a bit, but the interesting thing
is that your algorithm was barely edges out the "division by repeated
subtraction" algorithm.
Here are the results I got via RDTSC on my PIV:
Terje: 000000CC04E1F460
repeated subtraction: 000000B0960388E8
Now granted, I had to do a data copy with your algorithm so that it
would leave the data in the format I needed it in (and the subtraction
algorithm was able to do without the extra copy), and I suspect that I
can eliminate this copy by refactoring the results, but I was expecting
a *huge* difference in performance and was surprised by the fact that
the two algorithms came out fairly close to one another in performance.
I've posted my code, so see if you can spot any obvious mistakes that
would skew the results.
Cheers,
Randy Hyde
program t;
#include( "stdlib.hhf" )
static
OutputUnderscores :boolean := false;
procedure _unsToBuf32
(
d :dword in eax;
width :dword in ecx;
var buffer :char in edi
);
@noframe;
@nodisplay;
@noalignstack;
readonly(4)
noUSjt :dword[11] :=
[
&badWidth,
&noUS1,
&noUS2,
&noUS3,
&noUS4,
&noUS5,
&noUS6,
&noUS7,
&noUS8,
&noUS9,
&noUS10
];
#macro subDigit( subValue, posn ):done;
mov( '0', dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
jc done;
add( 1, dl );
sub( subValue, eax );
done:
add( subValue, eax );
mov( dl, [edi-posn] );
#endmacro
begin _unsToBuf32;
// Drop down here if we're not outputting underscores inbetween
// groups of three digits in the number.
cmp( ecx, 10 );
ja badWidth;
jmp( noUSjt[ecx*4] );
noUS10:
mov( '1', dl );
sub( 1_000_000_000, eax );
sub( 1_000_000_000, eax );
jc done10;
add( 1, dl );
sub( 1_000_000_000, eax );
jc done10;
add( 1, dl );
sub( 1_000_000_000, eax );
jc done10;
add( 1, dl );
sub( 1_000_000_000, eax );
done10:
add( 1_000_000_000, eax );
mov( dl, [edi-9] );
noUS9:
subDigit( 100_000_000, 8 );
noUS8:
subDigit( 10_000_000, 7 );
noUS7:
subDigit( 1_000_000, 6 );
noUS6:
subDigit( 100_000, 5 );
noUS5:
subDigit( 10_000, 4 );
noUS4:
subDigit( 1_000, 3 );
noUS3:
subDigit( 100, 2 );
noUS2:
subDigit( 10, 1 );
noUS1:
or( '0', al );
mov( al, [edi] );
sub( ecx, edi ); // Point EDI at the first char in the
buffer
add( 1, edi );
ret();
badWidth:
raise( ex.WidthTooBig );
end _unsToBuf32;
procedure __unsToBuf32
(
d :dword;
width :dword;
var buffer :char in edi
);
@noframe;
@nodisplay;
@noalignstack;
var
asciiBuf :char[12];
readonly(4)
noUSjt :dword[11] :=
[
&badWidth,
&noUS1,
&noUS2,
&noUS3,
&noUS4,
&noUS5,
&noUS6,
&noUS7,
&noUS8,
&noUS9,
&noUS10
];
begin __unsToBuf32;
push( ebp );
mov( esp, ebp );
sub( _vars_, esp );
push( eax );
push( ebx );
push( ecx );
push( edx );
push( esi );
// Terje Mathisen
mov( d, ebx );
mov( 2814749767, eax );
mul( ebx ); // Reciprocal (2^n / 1e5) MUL
shr( 1, ebx );
xor( ecx,ecx );
add( ebx, eax );
adc( edx, ecx );
mov( 100_000, eax );
shr( 16, ecx ); // ECX = high part
mov( d, ebx );
intmul( ecx, eax ); // High part * 100k
sub( eax, ebx );
mov( 429497, eax );
mul( ecx );
mov( eax, ecx );
add( '0', dl );
mov( 429497, eax );
mov( dl, asciiBuf[0] );
mul( ebx );
mov( eax, ebx );
add( 7, ecx );
shr( 3, ecx );
add( '0', dl );
mov( dl, asciiBuf[5] );
add( 7, ebx );
shr( 3, ebx );
lea( ecx, [ecx+ecx*4] );
mov( ecx, edx );
and( $fff_ffff, ecx );
shr( 28, edx );
lea( ebx, [ebx+ebx*4] );
add( '0', dl );
mov( ebx, eax );
shr( 28, eax );
mov( dl, asciiBuf[1] );
and( $fffffff, ebx );
add( '0', al );
mov( al, asciiBuf[6] );
lea( ecx,[ecx+ecx*4] );
lea( ebx,[ebx+ebx*4] );
mov( ecx, edx );
mov( ebx, eax );
and( $7ffffff, ecx );
shr( 27, edx );
and( $7ffffff, ebx );
shr( 27, eax );
add( '0', dl );
add( '0', al );
mov( dl, asciiBuf[2] );
mov( al, asciiBuf[7] );
lea( ecx,[ecx+ecx*4] );
lea( ebx,[ebx+ebx*4] );
mov( ecx, edx );
mov( ebx, eax );
and( $3ffffff, ecx );
shr( 26, edx );
and( $3ffffff, ebx );
shr( 26, eax );
add( '0', dl );
add( '0', al);
mov( dl, asciiBuf[3] );
mov( al, asciiBuf[8] );
lea( ecx, [ecx+ecx*4] );
shr( 25, ecx );
lea( ebx, [ebx+ebx*4] );
shr( 25, ebx );
add( '0', cl );
add( '0', bl );
mov( cl, asciiBuf[4] );
mov( bl, asciiBuf[9] );
mov( width, ecx );
cmp( ecx, 10 );
ja badWidth;
jmp( noUSjt[ecx*4] );
noUS10:
mov( (type dword asciiBuf[0]), ebx );
mov( (type dword asciiBuf[4]), eax );
mov( (type word asciiBuf[8]), cx );
mov( ebx, [edi-9] );
mov( eax, [edi-5] );
mov( cx, [edi-1] );
sub( 9, edi );
jmp done;
noUS9:
mov( (type byte asciiBuf[1]), cl );
mov( (type word asciiBuf[2]), bx );
mov( (type dword asciiBuf[4]), eax );
mov( (type word asciiBuf[8]), dx );
mov( cl, [edi-8] );
mov( bx, [edi-7] );
mov( eax, [edi-5] );
mov( dx, [edi-1] );
sub( 8, edi );
jmp done;
noUS8:
mov( (type dword asciiBuf[2]), ebx );
mov( (type dword asciiBuf[6]), eax );
mov( ebx, [edi-7] );
mov( eax, [edi-3] );
sub( 7, edi );
jmp done;
noUS7:
mov( (type byte asciiBuf[3]), cl );
mov( (type word asciiBuf[4]), bx );
mov( (type dword asciiBuf[6]), eax );
mov( cl, [edi-6] );
mov( bx, [edi-5] );
mov( eax, [edi-3] );
sub( 6, edi );
jmp done;
noUS6:
mov( (type word asciiBuf[4]), bx );
mov( (type dword asciiBuf[6]), eax );
mov( bx, [edi-5] );
mov( eax, [edi-3] );
sub( 5, edi );
jmp done;
noUS5:
mov( (type byte asciiBuf[5]), bl );
mov( (type dword asciiBuf[6]), eax );
mov( bx, [edi-4] );
mov( eax, [edi-3] );
sub( 4, edi );
jmp done;
noUS4:
mov( (type dword asciiBuf[6]), eax );
mov( eax, [edi-3] );
sub( 3, edi );
jmp done;
noUS3:
mov( (type byte asciiBuf[7]), bl );
mov( (type word asciiBuf[8]), ax );
mov( bl, [edi-2] );
mov( ax, [edi-1] );
sub( 2, edi );
jmp done;
noUS2:
mov( (type word asciiBuf[8]), ax );
mov( ax, [edi-1] );
sub( 1, edi );
jmp done;
noUS1:
mov( (type byte asciiBuf[9]), al );
mov( al, [edi] );
done:
pop( esi );
pop( edx );
pop( ecx );
pop( ebx );
pop( eax );
leave();
ret( _parms_ );
badWidth:
raise( ex.WidthTooBig );
end __unsToBuf32;
var
start :dword[2];
time1 :dword[2];
time2 :dword[2];
buffer :byte[16];
buffer2 :byte[16];
begin t;
xor( ecx, ecx );
mov( 16, ecx );
lea( edi, buffer );
mov( ' ', al );
rep.stosb();
mov( 16, ecx );
lea( edi, buffer2 );
mov( ' ', al );
rep.stosb();
mov( 0, time1 );
mov( 0, time1[4] );
mov( 0, time2 );
mov( 0, time2[4] );
loopit0:
lea( edi, buffer[15] );
mov( ecx, eax );
conv.u32Size( ecx );
mov( eax, ebx );
rdtsc();
mov( eax, start );
mov( edx, start[4] );
__unsToBuf32( ecx, ebx, [edi] );
rdtsc();
sub( start, eax );
sbb( start[4], edx );
add( eax, time1 );
adc( edx, time1[4] );
push( ecx );
lea( edi, buffer2[15] );
rdtsc();
mov( eax, start );
mov( edx, start[4] );
_unsToBuf32( ecx, ebx, [edi] );
rdtsc();
sub( start, eax );
sbb( start[4], edx );
add( eax, time2 );
adc( edx, time2[4] );
pop( ecx );
mov( (type dword buffer), eax );
cmp( eax, (type dword buffer2) );
jne Failure;
mov( (type dword buffer[4]), eax );
cmp( eax, (type dword buffer2[4]) );
jne Failure;
mov( (type dword buffer[8]), eax );
cmp( eax, (type dword buffer2[8]) );
jne Failure;
mov( (type dword buffer[12]), eax );
cmp( eax, (type dword buffer2[12]) );
jne Failure;
add( 1, ecx );
jnz loopit0;
stdout.put( "Time1: " );
stdout.putd( time1[4] );
stdout.putd( time1[0] );
stdout.newln();
stdout.put( "Time2: " );
stdout.putd( time2[4] );
stdout.putd( time2[0] );
stdout.newln();
exit t;
Failure:
stdout.put( "Failed to compare at ", ecx, nl );
end t;
.
- Follow-Ups:
- Re: Hex to ascii
- From: Terje Mathisen
- Re: Hex to ascii
- References:
- Hex to ascii
- From: Displacer
- Re: Hex to ascii
- From: Robert Redelmeier
- Re: Hex to ascii
- From: Rod Pemberton
- Re: Hex to ascii
- From: Terje Mathisen
- Re: Hex to ascii
- From: Jean-François Michaud
- Re: Hex to ascii
- From: Terje Mathisen
- Re: Hex to ascii
- From: Jean-François Michaud
- Re: Hex to ascii
- From: randyhyde@xxxxxxxxxxxxx
- Re: Hex to ascii
- From: Terje Mathisen
- Re: Hex to ascii
- From: randyhyde@xxxxxxxxxxxxx
- Re: Hex to ascii
- From: Terje Mathisen
- Re: Hex to ascii
- From: randyhyde@xxxxxxxxxxxxx
- Re: Hex to ascii
- From: Terje Mathisen
- Hex to ascii
- Prev by Date: Re: Why do Intel's processors provide four debug registers rather than only one?
- Next by Date: Re: Why do Intel's processors provide four debug registers rather than only one?
- Previous by thread: Re: Hex to ascii
- Next by thread: Re: Hex to ascii
- Index(es):
Relevant Pages
|
|