Created
April 28, 2026 19:42
-
-
Save JJTech0130/3ebdf3b36356f639bd51c8c02dbba342 to your computer and use it in GitHub Desktop.
jjgRISC assembler for Digital Systems II at RIT
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| jjgRISC assembler | |
| Assembles fmlRISC assembly (.s) to Altera .mif format. | |
| Based on dxp_asm, which is written in C, aimed to be implemented in a cleaner way. | |
| I added support for .equ and .word directives, .word doesn't happen to be very useful for SMIOP due to the fact that you can't load PM into registers. | |
| Also, I fixed the jump implementation. Needs a second pass to calculate the label address to jump to first. | |
| I implemented support for all memnotics for all IDNs, so hopefully it should be usable for everyone. | |
| Usage: python3 jjg_asm.py <source.s> [output.mif] | |
| Instruction formats (all 8-bit words): | |
| Type A ADD SUB XOR AND OR CPY : opcode[4] Rsd[2] Rs2[2] | |
| Type B INC DEC SHRA SHLL RRC IN OUT : opcode[4] Rsd[2] K[2] | |
| Type C LD ST : IW0 = opcode[4] Rsd[2] Ra[2], IW1 = offset[8] | |
| Type D JUMP : IW0 = 1101 cond[4], IW1 = (target - IW1_addr)[8] | |
| """ | |
| import sys | |
| import re | |
| # (type, opcode) | |
| MNEMONICS = { | |
| 'ADD': ('A', 0b0000), | |
| 'SUB': ('A', 0b0001), | |
| 'INC': ('B', 0b0010), | |
| 'DEC': ('B', 0b0011), | |
| 'XOR': ('A', 0b0100), | |
| 'AND': ('A', 0b0101), | |
| 'OR': ('A', 0b0110), | |
| 'CPY': ('A', 0b0111), | |
| 'SHRA': ('B', 0b1000), | |
| 'SHRL': ('B', 0b1001), # other IDN | |
| 'SHLL': ('B', 0b1001), | |
| 'RRC': ('B', 0b1010), | |
| 'RLC': ('B', 0b1010), # other IDN | |
| 'LD': ('C', 0b1011), | |
| 'ST': ('C', 0b1100), | |
| 'JUMP': ('D', 0b1101), | |
| 'IN': ('B', 0b1110), # other IDN | |
| 'POP': ('B', 0b1110), # other IDN | |
| 'OUT': ('B', 0b1111), | |
| 'PUSH': ('B', 0b1111), | |
| } | |
| REGS = {'R0': 0b00, 'R1': 0b01, 'R2': 0b10, 'R3': 0b11} | |
| # JUMP condition field encodings | |
| JUMP_CONDS = {'U': 0b0000, 'C': 0b1000, 'N': 0b0100, 'V': 0b0010, 'Z': 0b0001} | |
| def parse_reg(token): | |
| # normalize to uppercase, then look up in REGS | |
| r = token.upper() | |
| if r not in REGS: | |
| raise ValueError(f"Unknown register: {token!r}") | |
| return REGS[r] | |
| def parse_const(token): | |
| # constants just get interpreted by int() | |
| # base 0 means interpret base automatically | |
| # will handle 0x for hex, etc. | |
| return int(token, 0) | |
| def parse_operands(tokens): | |
| # parse the , and M[] out of the tokens | |
| result = [] | |
| for tok in tokens: | |
| if tok.upper().startswith('M['): | |
| tok = tok[2:] | |
| result.append(tok.rstrip(',]')) | |
| return result | |
| def int_to_binstr(val, bits): | |
| # convert to binary format, handling 2's complement | |
| # int_to_binstr(0b10110011, 8) -> "10110011" | |
| # int_to_binstr(-1, 8) -> "11111111" | |
| return format(val & ((1 << bits) - 1), f'0{bits}b') | |
| def parse_source(source): | |
| # parse assembly source into "records" | |
| # equates and words were added after the fact for .equ and .word respectively | |
| # record is in format (label_or_None, mnemonic, operand_tokens, line_number) | |
| records = [] | |
| equates = {} # .equ name -> value string, substituted into code lines | |
| words = [] # .word (name, init_value) in declaration order | |
| # unlike the dxp_asm, sections can appear multiple times in any order | |
| # we track the current section we're in while parsing here | |
| section = None | |
| for lineno, line in enumerate(source.splitlines(), 1): | |
| # strip comments and instruction terminators | |
| # we treat ; a bit differently than dxp_asm, we don't require them at the end of every instruction | |
| # but we can parse the original dxp assembly files just fine with this looser interpretation | |
| line = line.split(';')[0] | |
| # substitute .equ names before tokenizing code lines | |
| if section == 'code': | |
| for name, val in equates.items(): | |
| # use regex with word boundary (\b) so we don't accidentally replace substrings | |
| # (e.g. COUNT inside DISCOUNT should not be replaced) | |
| line = re.sub(r'\b' + re.escape(name) + r'\b', val, line) | |
| tokens = line.split() | |
| if not tokens: | |
| continue | |
| first = tokens[0] | |
| if first == '.directives': section = 'directives'; continue | |
| if first == '.enddirectives': section = None; continue | |
| if first == '.constants': section = 'constants'; continue | |
| if first == '.endconstants': section = None; continue | |
| if first == '.code': section = 'code'; continue | |
| if first == '.endcode': section = None; continue | |
| if section == 'directives' and first == '.equ' and len(tokens) >= 3: | |
| equates[tokens[1]] = tokens[2] | |
| continue | |
| if section == 'constants' and first == '.word' and len(tokens) >= 2: | |
| words.append((tokens[1], tokens[2] if len(tokens) >= 3 else '0x00')) | |
| continue | |
| if section != 'code': | |
| continue | |
| label = None | |
| if first.startswith('@'): | |
| label = first | |
| tokens = tokens[1:] | |
| if not tokens: | |
| continue | |
| first = tokens[0] | |
| records.append((label, first.upper(), tokens[1:], lineno)) | |
| return records, words | |
| def assemble(records, words=()): | |
| # take the records and actually assemble them into the .mif | |
| # we would assemble directly into binary and then convert to .mif, | |
| # but we want to have comments preserving the original assembly for debugging | |
| # returns (output, final_addr) where output is a list of (addr, byte_val, comment) | |
| # .word data bytes are appended after code with their initial values. | |
| # we need two passes so that we can support JUMP to label | |
| # first, we resolve the addresses where each label is going to end up | |
| # second, we emit the actual IW bytes (with the offset for the JUMPs) | |
| # Pass 1: determine label addresses | |
| labels = {} | |
| addr = 0 | |
| for label, mnem, _, lineno in records: | |
| if label is not None: | |
| labels[label] = addr | |
| if mnem not in MNEMONICS: | |
| raise ValueError(f"Line {lineno}: Unknown mnemonic '{mnem}'") | |
| itype, _ = MNEMONICS[mnem] | |
| # C and D get encoded to 2 bytes | |
| addr += 2 if itype in ('C', 'D') else 1 | |
| # assign .word addresses sequentially after code | |
| word_addrs = {} | |
| for name, init in words: | |
| word_addrs[name] = addr | |
| addr += 1 | |
| # Pass 2: assemble | |
| output = [] | |
| addr = 0 | |
| for label, mnem, operand_tokens, lineno in records: | |
| itype, opcode = MNEMONICS[mnem] | |
| ops = parse_operands(operand_tokens) | |
| # inject the resolved .word addresses | |
| # .equ was already handled during tokenization | |
| if word_addrs: | |
| ops = [hex(word_addrs[op]) if op in word_addrs else op for op in ops] | |
| if itype == 'A': | |
| rsd = parse_reg(ops[0]) | |
| rs2 = parse_reg(ops[1]) | |
| iw = (opcode << 4) | (rsd << 2) | rs2 | |
| # output (address, IW, and comment) | |
| # we use the raw operand tokens to keep the , etc. for free | |
| output.append((addr, iw, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}")) | |
| addr += 1 | |
| elif itype == 'B': | |
| rsd = parse_reg(ops[0]) | |
| k = parse_const(ops[1]) | |
| iw = (opcode << 4) | (rsd << 2) | (k & 0x3) | |
| output.append((addr, iw, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}")) | |
| addr += 1 | |
| elif itype == 'C': | |
| rsd = parse_reg(ops[0]) | |
| ra = parse_reg(ops[1]) | |
| offset = parse_const(ops[2]) | |
| iw0 = (opcode << 4) | (rsd << 2) | ra | |
| output.append((addr, iw0, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}")) | |
| addr += 1 | |
| output.append((addr, offset & 0xFF, operand_tokens[2])) | |
| addr += 1 | |
| elif itype == 'D': | |
| cond_str = ops[0] | |
| if cond_str not in JUMP_CONDS: | |
| raise ValueError(f"Line {lineno}: Invalid jump condition '{cond_str}'") | |
| cond = JUMP_CONDS[cond_str] | |
| iw0 = (opcode << 4) | cond | |
| output.append((addr, iw0, f"{mnem} {operand_tokens[0]}")) | |
| addr += 1 | |
| # we use the already resolved label addresses to emit the relative jump offset | |
| if ops[1] not in labels: | |
| raise ValueError(f"Line {lineno}: Undefined label '{ops[1]}'") | |
| jump_offset = labels[ops[1]] - addr | |
| output.append((addr, jump_offset & 0xFF, operand_tokens[1])) | |
| addr += 1 | |
| # emit .word initial values after code | |
| for name, init in words: | |
| output.append((word_addrs[name], int(init, 0) & 0xFF, f".word {name}")) | |
| return output, addr | |
| def write_mif(output, final_addr, out_path): | |
| with open(out_path, 'w') as f: | |
| f.write("--Program Memory Initialization File\n") | |
| f.write("--Created by jjg_asm\n") | |
| f.write("WIDTH = 8;\n") | |
| f.write("DEPTH = 1024;\n") | |
| f.write("ADDRESS_RADIX = HEX;\t% Can be HEX, BIN or DEC %\n") | |
| f.write("DATA_RADIX = BIN;\t% Can be HEX, BIN or DEC %\n") | |
| f.write("\nCONTENT BEGIN\n\n") | |
| for addr, byte_val, comment in output: | |
| f.write(f"{addr:04x} : {int_to_binstr(byte_val, 8)}; % {comment} %\n") | |
| f.write(f"[ {final_addr:04x} .. 3FF ] : 00000000; % Fill the remaining locations with 0 %\n") | |
| f.write(" END;\n") | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print(f"Usage: {sys.argv[0]} <source.s> [output.mif]", file=sys.stderr) | |
| sys.exit(1) | |
| src = sys.argv[1] | |
| # if the source is a .txt or .s file, replace the extension with .mif, otherwise, just append .mif on the end | |
| dst = sys.argv[2] if len(sys.argv) >= 3 else re.sub(r'\.txt$', '.mif', src) and re.sub(r'\.s$', '.mif', src) | |
| if dst == src: | |
| dst = src + '.mif' | |
| with open(src) as f: | |
| source = f.read() | |
| records, words = parse_source(source) | |
| output, final_addr = assemble(records, words) | |
| write_mif(output, final_addr, dst) | |
| print(f"Assembled {len(records)} instructions -> {final_addr} bytes -> {dst}") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment