Created
January 27, 2023 04:34
-
-
Save jgamble/2dc152d2096a67f809f3cec2e603317b to your computer and use it in GitHub Desktop.
Like hercdot.asm, write pixels onto the screen with the Hercules graphics card, but a byte's worth at a time instead of pixel by pixel., which was a significant speed-up on 8086 computers.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; | |
| ; Hercbyt -- write out a byte's worth of dots in the correct position | |
| ; for the Hercules graphics board. This is a variation on hercdot, | |
| ; which writes out only a dot at a time. | |
| ; | |
| ; Invocation: hercbyt(the_byte, y, x) | |
| ; the_byte = {0..255} | |
| ; y = {0..347} | |
| ; x = {0..712}, a multiple of 8, to stay on a byte boundry. | |
| ; | |
| ; In order to use the hercules graphics board, you have to | |
| ; calculate the offset from page 0 (= B0000). The formula is: | |
| ; | |
| ; (2000h * (Y MOD 4)) + (90 * INT(Y/4)) + INT(X/8) | |
| ; | |
| ; | |
| ; The first term is equivalent to (Y & 3) << 13. | |
| ; | |
| ; The second term has to be found by faking multiplication with shifts and | |
| ; adds (imul is too slow) using Y >> 2. | |
| ; | |
| ; The third term is equivalent to X >> 3. | |
| ; | |
| ; Written by John M. Gamble April 1986 | |
| ; | |
| include ctoasm.mac | |
| page0 equ 0b000h | |
| pseg | |
| public hercbyt | |
| hercbyt proc far | |
| mov di, bp ; In most functions, this would | |
| ; be a 'push bp' instruction, but | |
| ; di is not used here, and the move | |
| ; instruction takes 3 cycles vs. the | |
| ; eleven cycles of a push operation. | |
| ; Stack references are reduced by 2. | |
| ; | |
| mov bp, sp ; set up bp to get the arguments. | |
| mov si, es ; save es without pushing. | |
| mov cx, [bp + 6] ; y value | |
| ; | |
| ; To get the first term, we take the lower two bits, and 'multiply' | |
| ; by 2000h. This can be accomplished by shifting left 13 times. | |
| ; However, we cheat a little by moving the low byte into the high | |
| ; byte (counts as an 8 bit shift) and shifting 5 times. | |
| ; The shifts are done explicately, taking 10 cycles, instead of | |
| ; the 22 cycles it takes with the move and shl <reg> cl instructions. | |
| ; The two bits are masked out in the middle of the sequence of shifts, | |
| ; because shift instructions are very fast and empty the instruction | |
| ; pipe quickly, which means that cpu time is wasted in filling the | |
| ; pipe. So toss the 'and' instruction in the middle, to give the | |
| ; pipe a break. | |
| ; | |
| mov dh, cl ; dh gets the low byte. | |
| shl dx, 1 | |
| shl dx, 1 | |
| shl dx, 1 | |
| and dx, 1800h ; only the low two bits are kept | |
| shl dx, 1 | |
| shl dx, 1 ; first term now in dx. | |
| ; | |
| ; Now shift the original y value by two (which is faster than loading | |
| ; cl or div'ing by 4) and 'multiply' this by 90 using shifts and adds, | |
| ; which saves us 4 clock cycles over a data table lookup for an 8088 and | |
| ; might save the same on an 8086 if the table wound up on an odd address. | |
| ; Either way, the table space is saved. | |
| ; | |
| shr cx, 1 | |
| and cl, 0feh ; the 'and' instruction is equivalent | |
| ; to a shift right/shift left pair. | |
| ; The 'and'instruction takes the same | |
| ; time but one less byte of space. | |
| ; we are now at times 2. | |
| mov ax, cx ; give it to ax for addition. | |
| shl cx, 1 ; multiply (now at times 4). | |
| shl cx, 1 ; " (now at times 8). | |
| add cx, ax ; add (now at times 10). | |
| shl cx, 1 ; multiply (now at times 20). | |
| add cx, ax ; add (now at times 22). | |
| shl cx, 1 ; multiply (now at times 44). | |
| shl cx, 1 ; " (now at times 88). | |
| add cx, ax ; add (now at times 90)!!! | |
| ; | |
| add dx, cx ; add it to first. | |
| ; | |
| ; Now get the X coordinate. | |
| ; | |
| mov bx, [bp + 8] | |
| shr bx, 1 ; 'divide' by 8. | |
| shr bx, 1 | |
| shr bx, 1 | |
| add bx, dx ; BX HAS THE OFFSET! | |
| ; | |
| mov dx, page0 ; get the page address | |
| mov es, dx | |
| mov ax, [bp + 4] ; get the byte. | |
| mov es:[bx], al ; set the byte. | |
| ; | |
| ; Restore segment registers and say "good-bye". | |
| ; | |
| mov es, si ; restore es. | |
| mov bp, di ; restore bp. | |
| ret | |
| hercbyt endp | |
| endps | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment