Skip to content

Instantly share code, notes, and snippets.

@JJTech0130
Created April 28, 2026 19:42
Show Gist options
  • Select an option

  • Save JJTech0130/3ebdf3b36356f639bd51c8c02dbba342 to your computer and use it in GitHub Desktop.

Select an option

Save JJTech0130/3ebdf3b36356f639bd51c8c02dbba342 to your computer and use it in GitHub Desktop.
jjgRISC assembler for Digital Systems II at RIT
"""
jjgRISC assembler
Assembles fmlRISC assembly (.s) to Altera .mif format.
Based on dxp_asm, which is written in C, aimed to be implemented in a cleaner way.
I added support for .equ and .word directives, .word doesn't happen to be very useful for SMIOP due to the fact that you can't load PM into registers.
Also, I fixed the jump implementation. Needs a second pass to calculate the label address to jump to first.
I implemented support for all memnotics for all IDNs, so hopefully it should be usable for everyone.
Usage: python3 jjg_asm.py <source.s> [output.mif]
Instruction formats (all 8-bit words):
Type A ADD SUB XOR AND OR CPY : opcode[4] Rsd[2] Rs2[2]
Type B INC DEC SHRA SHLL RRC IN OUT : opcode[4] Rsd[2] K[2]
Type C LD ST : IW0 = opcode[4] Rsd[2] Ra[2], IW1 = offset[8]
Type D JUMP : IW0 = 1101 cond[4], IW1 = (target - IW1_addr)[8]
"""
import sys
import re
# (type, opcode)
MNEMONICS = {
'ADD': ('A', 0b0000),
'SUB': ('A', 0b0001),
'INC': ('B', 0b0010),
'DEC': ('B', 0b0011),
'XOR': ('A', 0b0100),
'AND': ('A', 0b0101),
'OR': ('A', 0b0110),
'CPY': ('A', 0b0111),
'SHRA': ('B', 0b1000),
'SHRL': ('B', 0b1001), # other IDN
'SHLL': ('B', 0b1001),
'RRC': ('B', 0b1010),
'RLC': ('B', 0b1010), # other IDN
'LD': ('C', 0b1011),
'ST': ('C', 0b1100),
'JUMP': ('D', 0b1101),
'IN': ('B', 0b1110), # other IDN
'POP': ('B', 0b1110), # other IDN
'OUT': ('B', 0b1111),
'PUSH': ('B', 0b1111),
}
REGS = {'R0': 0b00, 'R1': 0b01, 'R2': 0b10, 'R3': 0b11}
# JUMP condition field encodings
JUMP_CONDS = {'U': 0b0000, 'C': 0b1000, 'N': 0b0100, 'V': 0b0010, 'Z': 0b0001}
def parse_reg(token):
# normalize to uppercase, then look up in REGS
r = token.upper()
if r not in REGS:
raise ValueError(f"Unknown register: {token!r}")
return REGS[r]
def parse_const(token):
# constants just get interpreted by int()
# base 0 means interpret base automatically
# will handle 0x for hex, etc.
return int(token, 0)
def parse_operands(tokens):
# parse the , and M[] out of the tokens
result = []
for tok in tokens:
if tok.upper().startswith('M['):
tok = tok[2:]
result.append(tok.rstrip(',]'))
return result
def int_to_binstr(val, bits):
# convert to binary format, handling 2's complement
# int_to_binstr(0b10110011, 8) -> "10110011"
# int_to_binstr(-1, 8) -> "11111111"
return format(val & ((1 << bits) - 1), f'0{bits}b')
def parse_source(source):
# parse assembly source into "records"
# equates and words were added after the fact for .equ and .word respectively
# record is in format (label_or_None, mnemonic, operand_tokens, line_number)
records = []
equates = {} # .equ name -> value string, substituted into code lines
words = [] # .word (name, init_value) in declaration order
# unlike the dxp_asm, sections can appear multiple times in any order
# we track the current section we're in while parsing here
section = None
for lineno, line in enumerate(source.splitlines(), 1):
# strip comments and instruction terminators
# we treat ; a bit differently than dxp_asm, we don't require them at the end of every instruction
# but we can parse the original dxp assembly files just fine with this looser interpretation
line = line.split(';')[0]
# substitute .equ names before tokenizing code lines
if section == 'code':
for name, val in equates.items():
# use regex with word boundary (\b) so we don't accidentally replace substrings
# (e.g. COUNT inside DISCOUNT should not be replaced)
line = re.sub(r'\b' + re.escape(name) + r'\b', val, line)
tokens = line.split()
if not tokens:
continue
first = tokens[0]
if first == '.directives': section = 'directives'; continue
if first == '.enddirectives': section = None; continue
if first == '.constants': section = 'constants'; continue
if first == '.endconstants': section = None; continue
if first == '.code': section = 'code'; continue
if first == '.endcode': section = None; continue
if section == 'directives' and first == '.equ' and len(tokens) >= 3:
equates[tokens[1]] = tokens[2]
continue
if section == 'constants' and first == '.word' and len(tokens) >= 2:
words.append((tokens[1], tokens[2] if len(tokens) >= 3 else '0x00'))
continue
if section != 'code':
continue
label = None
if first.startswith('@'):
label = first
tokens = tokens[1:]
if not tokens:
continue
first = tokens[0]
records.append((label, first.upper(), tokens[1:], lineno))
return records, words
def assemble(records, words=()):
# take the records and actually assemble them into the .mif
# we would assemble directly into binary and then convert to .mif,
# but we want to have comments preserving the original assembly for debugging
# returns (output, final_addr) where output is a list of (addr, byte_val, comment)
# .word data bytes are appended after code with their initial values.
# we need two passes so that we can support JUMP to label
# first, we resolve the addresses where each label is going to end up
# second, we emit the actual IW bytes (with the offset for the JUMPs)
# Pass 1: determine label addresses
labels = {}
addr = 0
for label, mnem, _, lineno in records:
if label is not None:
labels[label] = addr
if mnem not in MNEMONICS:
raise ValueError(f"Line {lineno}: Unknown mnemonic '{mnem}'")
itype, _ = MNEMONICS[mnem]
# C and D get encoded to 2 bytes
addr += 2 if itype in ('C', 'D') else 1
# assign .word addresses sequentially after code
word_addrs = {}
for name, init in words:
word_addrs[name] = addr
addr += 1
# Pass 2: assemble
output = []
addr = 0
for label, mnem, operand_tokens, lineno in records:
itype, opcode = MNEMONICS[mnem]
ops = parse_operands(operand_tokens)
# inject the resolved .word addresses
# .equ was already handled during tokenization
if word_addrs:
ops = [hex(word_addrs[op]) if op in word_addrs else op for op in ops]
if itype == 'A':
rsd = parse_reg(ops[0])
rs2 = parse_reg(ops[1])
iw = (opcode << 4) | (rsd << 2) | rs2
# output (address, IW, and comment)
# we use the raw operand tokens to keep the , etc. for free
output.append((addr, iw, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}"))
addr += 1
elif itype == 'B':
rsd = parse_reg(ops[0])
k = parse_const(ops[1])
iw = (opcode << 4) | (rsd << 2) | (k & 0x3)
output.append((addr, iw, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}"))
addr += 1
elif itype == 'C':
rsd = parse_reg(ops[0])
ra = parse_reg(ops[1])
offset = parse_const(ops[2])
iw0 = (opcode << 4) | (rsd << 2) | ra
output.append((addr, iw0, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}"))
addr += 1
output.append((addr, offset & 0xFF, operand_tokens[2]))
addr += 1
elif itype == 'D':
cond_str = ops[0]
if cond_str not in JUMP_CONDS:
raise ValueError(f"Line {lineno}: Invalid jump condition '{cond_str}'")
cond = JUMP_CONDS[cond_str]
iw0 = (opcode << 4) | cond
output.append((addr, iw0, f"{mnem} {operand_tokens[0]}"))
addr += 1
# we use the already resolved label addresses to emit the relative jump offset
if ops[1] not in labels:
raise ValueError(f"Line {lineno}: Undefined label '{ops[1]}'")
jump_offset = labels[ops[1]] - addr
output.append((addr, jump_offset & 0xFF, operand_tokens[1]))
addr += 1
# emit .word initial values after code
for name, init in words:
output.append((word_addrs[name], int(init, 0) & 0xFF, f".word {name}"))
return output, addr
def write_mif(output, final_addr, out_path):
with open(out_path, 'w') as f:
f.write("--Program Memory Initialization File\n")
f.write("--Created by jjg_asm\n")
f.write("WIDTH = 8;\n")
f.write("DEPTH = 1024;\n")
f.write("ADDRESS_RADIX = HEX;\t% Can be HEX, BIN or DEC %\n")
f.write("DATA_RADIX = BIN;\t% Can be HEX, BIN or DEC %\n")
f.write("\nCONTENT BEGIN\n\n")
for addr, byte_val, comment in output:
f.write(f"{addr:04x} : {int_to_binstr(byte_val, 8)}; % {comment} %\n")
f.write(f"[ {final_addr:04x} .. 3FF ] : 00000000; % Fill the remaining locations with 0 %\n")
f.write(" END;\n")
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <source.s> [output.mif]", file=sys.stderr)
sys.exit(1)
src = sys.argv[1]
# if the source is a .txt or .s file, replace the extension with .mif, otherwise, just append .mif on the end
dst = sys.argv[2] if len(sys.argv) >= 3 else re.sub(r'\.txt$', '.mif', src) and re.sub(r'\.s$', '.mif', src)
if dst == src:
dst = src + '.mif'
with open(src) as f:
source = f.read()
records, words = parse_source(source)
output, final_addr = assemble(records, words)
write_mif(output, final_addr, dst)
print(f"Assembled {len(records)} instructions -> {final_addr} bytes -> {dst}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment