Re: Hex to ascii



randyhyde@xxxxxxxxxxxxx wrote:
Terje Mathisen wrote:
Huh?
Well, if it were only three MUL instructions I'd probably be okay with
that. But it took more. And I still had accuracy problems that I had to

Aha!

You used reciprocal MUL as a simple substitution for DIV, and you probably didn't do the full error analysis needed to prove how to get exact results either?

Anyway, my algorithm is totally different: It splits the 32-bit binary number into two 5-digit (decimal) halfs, using a reciprocal multiplication by 1/100000.

The next step is to scale both of these numbers so as to make them a binary/decimal scaled fraction, with the (scaled) decimal point after the first digit.

At this point each pair of digits (one from each half) can be retrieved by masking away the top digit After storing it), multiplying the fraction by 5 (i.e. 10/2), and repeat the process, with the implied decimal point one bit further down.

But if anyone has a 32-bit integer to decimal string conversion routine
that uses MUL for the division operation, I'd love to see it. It's not
like I'm happy with the repeated subtraction approach, just that the
repeated subtraction approach is the fastest I've created so far...

Since I invented it, I'll post my original version. As I've stated previously, you can see AMD's minor tweaks to it in their optimization manual.

The version below uses 3 MUL operations and a single IMUL. The rest is mostly LEA, SUB, AND, ADD and SHR by constant.

char *utoa(char *buf, unsigned long n)
{
__asm {
mov ebx,[n]
mov eax,2814749767

mul ebx // Reciprocal (2^n / 1e5) MUL

shr ebx,1
xor ecx,ecx

mov edi,[buf]
add eax,ebx

adc ecx,edx
mov eax,100000

shr ecx,16 // ECX = high part
mov ebx,[n]

imul eax,ecx // High part * 100k

sub ebx,eax // Low part
mov eax,429497

mul ecx

mov ecx,eax
add dl,'0'

mov eax,429497
mov [edi],dl

mul ebx

mov ebx,eax
add ecx,7

shr ecx,3
add dl,'0'

mov [edi+5],dl
add ebx,7

shr ebx,3
lea ecx,[ecx+ecx*4]

mov edx,ecx
and ecx,0fffffffh

shr edx,28
lea ebx,[ebx+ebx*4]

add dl,'0'
mov eax,ebx

shr eax,28
mov [edi+1],dl

and ebx,0fffffffh
add al,'0'

mov [edi+6],al
lea ecx,[ecx+ecx*4]

lea ebx,[ebx+ebx*4]
mov edx,ecx

mov eax,ebx
and ecx,07ffffffh

shr edx,27
and ebx,07ffffffh

shr eax,27
add dl,'0'

add al,'0'
mov [edi+2],dl

mov [edi+7],al
lea ecx,[ecx+ecx*4]

lea ebx,[ebx+ebx*4]
mov edx,ecx

mov eax,ebx
and ecx,03ffffffh

shr edx,26
and ebx,03ffffffh

shr eax,26
add dl,'0'

add al,'0'
mov [edi+3],dl

mov [edi+8],al
lea ecx,[ecx+ecx*4]

shr ecx,25
lea ebx,[ebx+ebx*4]

shr ebx,25
add cl,'0'

add bl,'0'
mov [edi+10],ah

mov [edi+4],cl
mov [edi+9],bl
}

return buf;
}

Terje

--
- <Terje.Mathisen@xxxxxxxxxxxxx>
"almost all programming can be viewed as an exercise in caching"

.



Relevant Pages