Skip to content

Instantly share code, notes, and snippets.

@jgamble
Created January 27, 2023 04:34
Show Gist options
  • Select an option

  • Save jgamble/2dc152d2096a67f809f3cec2e603317b to your computer and use it in GitHub Desktop.

Select an option

Save jgamble/2dc152d2096a67f809f3cec2e603317b to your computer and use it in GitHub Desktop.
Like hercdot.asm, write pixels onto the screen with the Hercules graphics card, but a byte's worth at a time instead of pixel by pixel., which was a significant speed-up on 8086 computers.
;
; Hercbyt -- write out a byte's worth of dots in the correct position
; for the Hercules graphics board. This is a variation on hercdot,
; which writes out only a dot at a time.
;
; Invocation: hercbyt(the_byte, y, x)
; the_byte = {0..255}
; y = {0..347}
; x = {0..712}, a multiple of 8, to stay on a byte boundry.
;
; In order to use the hercules graphics board, you have to
; calculate the offset from page 0 (= B0000). The formula is:
;
; (2000h * (Y MOD 4)) + (90 * INT(Y/4)) + INT(X/8)
;
;
; The first term is equivalent to (Y & 3) << 13.
;
; The second term has to be found by faking multiplication with shifts and
; adds (imul is too slow) using Y >> 2.
;
; The third term is equivalent to X >> 3.
;
; Written by John M. Gamble April 1986
;
include ctoasm.mac
page0 equ 0b000h
pseg
public hercbyt
hercbyt proc far
mov di, bp ; In most functions, this would
; be a 'push bp' instruction, but
; di is not used here, and the move
; instruction takes 3 cycles vs. the
; eleven cycles of a push operation.
; Stack references are reduced by 2.
;
mov bp, sp ; set up bp to get the arguments.
mov si, es ; save es without pushing.
mov cx, [bp + 6] ; y value
;
; To get the first term, we take the lower two bits, and 'multiply'
; by 2000h. This can be accomplished by shifting left 13 times.
; However, we cheat a little by moving the low byte into the high
; byte (counts as an 8 bit shift) and shifting 5 times.
; The shifts are done explicately, taking 10 cycles, instead of
; the 22 cycles it takes with the move and shl <reg> cl instructions.
; The two bits are masked out in the middle of the sequence of shifts,
; because shift instructions are very fast and empty the instruction
; pipe quickly, which means that cpu time is wasted in filling the
; pipe. So toss the 'and' instruction in the middle, to give the
; pipe a break.
;
mov dh, cl ; dh gets the low byte.
shl dx, 1
shl dx, 1
shl dx, 1
and dx, 1800h ; only the low two bits are kept
shl dx, 1
shl dx, 1 ; first term now in dx.
;
; Now shift the original y value by two (which is faster than loading
; cl or div'ing by 4) and 'multiply' this by 90 using shifts and adds,
; which saves us 4 clock cycles over a data table lookup for an 8088 and
; might save the same on an 8086 if the table wound up on an odd address.
; Either way, the table space is saved.
;
shr cx, 1
and cl, 0feh ; the 'and' instruction is equivalent
; to a shift right/shift left pair.
; The 'and'instruction takes the same
; time but one less byte of space.
; we are now at times 2.
mov ax, cx ; give it to ax for addition.
shl cx, 1 ; multiply (now at times 4).
shl cx, 1 ; " (now at times 8).
add cx, ax ; add (now at times 10).
shl cx, 1 ; multiply (now at times 20).
add cx, ax ; add (now at times 22).
shl cx, 1 ; multiply (now at times 44).
shl cx, 1 ; " (now at times 88).
add cx, ax ; add (now at times 90)!!!
;
add dx, cx ; add it to first.
;
; Now get the X coordinate.
;
mov bx, [bp + 8]
shr bx, 1 ; 'divide' by 8.
shr bx, 1
shr bx, 1
add bx, dx ; BX HAS THE OFFSET!
;
mov dx, page0 ; get the page address
mov es, dx
mov ax, [bp + 4] ; get the byte.
mov es:[bx], al ; set the byte.
;
; Restore segment registers and say "good-bye".
;
mov es, si ; restore es.
mov bp, di ; restore bp.
ret
hercbyt endp
endps
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment