Multipath Calls

; The Multipath implementation below, same as in results2, is still faster than
; a “optimized” standard call return using the carry flag instead of a register
; to indicate an error condition.
;
; Standard Baseline:
; Rate Cycles
; —- ——–
; 1/16: 24630368
; 1/4: 17506368
; 1/2: 21004752
; 3/4: 18504752
; 15/16:18379872
;
; Multipath Test:
; Rate Cycles
; —- ——–
; 1/16: 22130688
; 1/4: 17173600
; 1/2: 18004624
; 3/4: 18004464
; 15/16:18254672
;
; Even with a high “success” rate of 15/16 in the standard implementation,
; the multipath manages to perform slightly better has it does not need
; to execute the ’stc’ and ‘jc’ operations.
;
;
[BITS 32]

MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1
MULTIBOOT_HEADER_MAGIC equ 0×1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN|MULTIBOOT_MEMORY_INFO
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)

section .text
dd MULTIBOOT_HEADER_MAGIC
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM

global start
start:
cld

; clear screen
mov edi, 0xB8000
mov ecx, 80*25*2/4
xor eax, eax
rep stosd

mov ebp, 160
mov edi, 0xB8000

; step 1: Standard Return baselining 1/16 return rate
lea eax, [str_step1]
mov ecx, str_step1.length
mov ebx, 1
mov edx, 16
lea esi, [standard_baseline]
call do_test
mov [result_step1], eax
mov [result_step1+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

; step 2: Multipath Return test at 1/16 return rate
lea eax, [str_step2]
mov ecx, str_step2.length
mov ebx, 1
mov edx, 16
lea esi, [multipath_test]
call do_test
mov [result_step2], eax
mov [result_step2+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

; step 3: Standard Return baselining 1/4 return rate
lea eax, [str_step3]
mov ecx, str_step3.length
mov ebx, 1
mov edx, 4
lea esi, [standard_baseline]
call do_test
mov [result_step3], eax
mov [result_step3+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

; step 4: Multipath Return test 1/4 return rate
lea eax, [str_step4]
mov ecx, str_step4.length
mov ebx, 1
mov edx, 4
lea esi, [multipath_test]
call do_test
mov [result_step4], eax
mov [result_step4+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

; step 5: Standard Return baselining 1/2 return rate
lea eax, [str_step5]
mov ecx, str_step5.length
mov ebx, 1
mov edx, 2
lea esi, [standard_baseline]
call do_test
mov [result_step5], eax
mov [result_step5+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

; step 6: Multipath Return test 1/2 return rate
lea eax, [str_step6]
mov ecx, str_step6.length
mov ebx, 1
mov edx, 2
lea esi, [multipath_test]
call do_test
mov [result_step6], eax
mov [result_step6+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

; step 7: Standard Return baselining 3/4 return rate
lea eax, [str_step7]
mov ecx, str_step7.length
mov ebx, 3
mov edx, 4
lea esi, [standard_baseline]
call do_test
mov [result_step7], eax
mov [result_step7+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

; step 8: Multipath Return test 3/4 return rate
lea eax, [str_step8]
mov ecx, str_step8.length
mov ebx, 3
mov edx, 4
mov esi, multipath_test
call do_test
mov [result_step8], eax
mov [result_step8+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

; step 9: Standard Return baselining 15/16 return rate
lea eax, [str_step9]
mov ecx, str_step9.length
mov ebx, 15
mov edx, 16
mov esi, standard_baseline
call do_test
mov [result_step9], eax
mov [result_step9+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

; step 10: Multipath Return test 15/16 return rate
lea eax, [str_step10]
mov ecx, str_step10.length
mov ebx, 15
mov edx, 16
mov esi, multipath_test
call do_test
mov [result_step10], eax
mov [result_step10+4], edx
call display_counters ; Scrap(EAX,EBX,ECX,EDX)Mod(EDI=+160)

lea eax, [str_done]
mov ecx, str_done.length
call display_string ; Scrap(ESI,EAX)Mod(EDI,ECX=0)

call wait_for_escape

mov edi, 0xB80A0 – ((16*2)+4)
mov eax, [result_step1+4]
call display_hex
add edi, byte 4
mov eax, [result_step1]
call display_hex
add edi, dword 0xA0 – ((16*2)+4)
mov eax, [result_step2+4]
call display_hex
add edi, byte 4
mov eax, [result_step2]
call display_hex
add edi, dword 0xA0 – ((16*2)+4)
mov eax, [result_step3+4]
call display_hex
add edi, byte 4
mov eax, [result_step3]
call display_hex
add edi, dword 0xA0 – ((16*2)+4)
mov eax, [result_step4+4]
call display_hex
add edi, byte 4
mov eax, [result_step4]
call display_hex
add edi, dword 0xA0 – ((16*2)+4)
mov eax, [result_step5+4]
call display_hex
add edi, byte 4
mov eax, [result_step5]
call display_hex
add edi, dword 0xA0 – ((16*2)+4)
mov eax, [result_step6+4]
call display_hex
add edi, byte 4
mov eax, [result_step6]
call display_hex
add edi, dword 0xA0 – ((16*2)+4)
mov eax, [result_step7+4]
call display_hex
add edi, byte 4
mov eax, [result_step7]
call display_hex
add edi, dword 0xA0 – ((16*2)+4)
mov eax, [result_step8+4]
call display_hex
add edi, byte 4
mov eax, [result_step8]
call display_hex
add edi, dword 0xA0 – ((16*2)+4)
mov eax, [result_step9+4]
call display_hex
add edi, byte 4
mov eax, [result_step9]
call display_hex
add edi, dword 0xA0 – ((16*2)+4)
mov eax, [result_step10+4]
call display_hex
add edi, byte 4
mov eax, [result_step10]
call display_hex

lea eax, [str_reset]
mov ecx, str_reset.length
call display_string ; Scrap(ESI,EAX)Mod(EDI,ECX=0)

call wait_for_escape

ud2
jmp short $

do_test:
; EAX: description of the step undertaken
; ECX: length of the descriptive text in EAX
; EBX: return rate EBX/EDX
; EDX: return rate EBX/EDX
; EDI: location on screen to display the results
; ESI: function to use for the test
; EBP: distance in bytes between two VGA text lines (160)
;
; Return:
; EDX:EAX total cycles required for the testing
;
; Scrap(EBX,ECX=0)

push edi

push esi
call display_string ; Need(EAX,ECX,EDI)Scrap(ESI,EAX)Mod(EDI,ECX=0)
call reset_counters ; Need()Scrap(EAX)
pop esi

push edx
rdtsc
mov [rdtsc_counters], eax
mov [rdtsc_counters+4], edx
pop edx

pushad ; — safeguard
call esi
popad ; — safeguard

rdtsc
sub eax, dword [rdtsc_counters]
sbb edx, dword [rdtsc_counters+4]

pop edi
retn

display_hex:
; EAX = hex to display
; EDI = location to display the hex digits
; Scrap(ECX,EBX,EDX)Mod(EDI)
mov cl, 8
xor ebx, ebx
.processing:
rol eax, 4
mov bl, al
and bl, 0×0F
mov dl, byte [ebx + .hex_digits]
mov [edi], dl
mov [edi + 1], byte 0×07
add edi, byte 2
dec cl
jnz .processing
retn

.hex_digits: db ‘0123456789ABCDEF’

display_string:
; EAX = string to display
; ECX = length of the string
; EDI = location to display the string
; Scrap(ESI,EAX)Mod(EDI,ECX=0)
mov esi, eax
mov ah, 0×07
.copying_string:
lodsb
stosw
loop .copying_string
retn

wait_for_escape:
; Scrap(EAX)
.wait_escape_down:
in al, 0×60
cmp al, 0×01
jnz .wait_escape_down
.wait_escape_release:
in al, 0×60
cmp al, 0×81
jnz .wait_escape_release
retn

reset_counters:
; Scrap(EAX)
xor eax, eax
mov [counter0], eax
mov [counter1], eax
retn

display_counters:
; EDI = location where to right-align the counters
; Scrap(EAX,ECX,EBX,EDX)Mod(EDI=+160)
add edi, dword (160-((16*2)+4))
mov eax, [counter0]
call display_hex ; Scrap(ECX,EBX,EDX)Mod(EDI=+16)
add edi, byte 4
mov eax, [counter1]
call display_hex ; Scrap(ECX,EBX,EDX)Mod(EDI=+16)
retn

align 4096, db 0
standard_baseline:
; EBX/EDX return rate
pushad
mov ecx, 1000000
.reset:
xor eax, eax
.processing:
inc eax
cmp eax, edx
ja .reset
call below_or_not_standard
jc .is_not_below
inc dword [counter0]
loop .processing
popad
retn
.is_not_below:
inc dword [counter1]
loop .processing
popad
retn

align 4096, db 0
below_or_not_standard:
cmp eax, ebx
ja .not_below
clc
retn
.not_below:
stc
retn

align 4096, db 0
multipath_test:
; EBX/EDX return rate
mov ecx, 1000000
.reset:
xor eax, eax
.processing:
inc eax
cmp eax, edx
ja .reset
jmp .do_call
dd .is_below
dd .is_not
.do_call:
call below_or_not_multipath
.is_below:
inc dword [counter0]
loop .processing
retn
.is_not:
inc dword [counter1]
loop .processing
retn

align 4096, db 0
below_or_not_multipath:
cmp eax, ebx
ja .not_below
retn
.not_below:
pop edi
jmp [edi-9]

section .data
str_step1:
.start: db “Standard Return 1/16:”
.length equ $- .start
str_step2:
.start: db “Multipath Return 1/16:”
.length equ $- .start
str_step3:
.start: db “Standard Return 1/4:”
.length equ $- .start
str_step4:
.start: db “Multipath Return 1/4:”
.length equ $- .start
str_step5:
.start: db “Standard Return 1/2:”
.length equ $- .start
str_step6:
.start: db “Multipath Return 1/2:”
.length equ $- .start
str_step7:
.start: db “Standard Return 3/4:”
.length equ $- .start
str_step8:
.start: db “Multipath Return 3/4:”
.length equ $- .start
str_step9:
.start: db “Standard Return 15/16:”
.length equ $- .start
str_step10:
.start: db “Multipath Return 15/16:”
.length equ $- .start

str_done:
.start: db “Computations done. Press Escape when ready to view the results”
.length equ $- .start

str_reset:
.start: db “Results displayed. Press Escape when ready to restart the computer”
.length equ $- .start

section .bss

counter0: resd 1
counter1: resd 1

rdtsc_counters: resd 2

result_step1: resd 2
result_step2: resd 2
result_step3: resd 2
result_step4: resd 2
result_step5: resd 2
result_step6: resd 2
result_step7: resd 2
result_step8: resd 2
result_step9: resd 2
result_step10: resd 2