JJTech0130 · April 28, 2026 19:42
diff --git a/jjg_asm.py b/jjg_asm.py
 """
 jjgRISC assembler
 Assembles fmlRISC assembly (.s) to Altera .mif format.

 Based on dxp_asm, which is written in C, aimed to be implemented in a cleaner way.

 I added support for .equ and .word directives, .word doesn't happen to be very useful for SMIOP due to the fact that you can't load PM into registers.
 Also, I fixed the jump implementation. Needs a second pass to calculate the label address to jump to first.

 I implemented support for all memnotics for all IDNs, so hopefully it should be usable for everyone.

 Usage: python3 jjg_asm.py <source.s> [output.mif]

 Instruction formats (all 8-bit words):
  Type A  ADD SUB XOR AND OR CPY       : opcode[4] Rsd[2] Rs2[2]
  Type B  INC DEC SHRA SHLL RRC IN OUT : opcode[4] Rsd[2] K[2]
  Type C  LD ST                        : IW0 = opcode[4] Rsd[2] Ra[2], IW1 = offset[8]
  Type D  JUMP                         : IW0 = 1101 cond[4],           IW1 = (target - IW1_addr)[8]
 """

 import sys
 import re

 # (type, opcode)
 MNEMONICS = {
    'ADD':  ('A', 0b0000),
    'SUB':  ('A', 0b0001),
    'INC':  ('B', 0b0010),
    'DEC':  ('B', 0b0011),
    'XOR':  ('A', 0b0100),
    'AND':  ('A', 0b0101),
    'OR':   ('A', 0b0110),
    'CPY':  ('A', 0b0111),
    'SHRA': ('B', 0b1000),
    'SHRL': ('B', 0b1001), # other IDN
    'SHLL': ('B', 0b1001),
    'RRC':  ('B', 0b1010),
    'RLC':  ('B', 0b1010), # other IDN
    'LD':   ('C', 0b1011),
    'ST':   ('C', 0b1100),
    'JUMP': ('D', 0b1101),
    'IN':   ('B', 0b1110), # other IDN
    'POP':  ('B', 0b1110), # other IDN
    'OUT':  ('B', 0b1111),
    'PUSH': ('B', 0b1111),
 }

 REGS = {'R0': 0b00, 'R1': 0b01, 'R2': 0b10, 'R3': 0b11}

 # JUMP condition field encodings
 JUMP_CONDS = {'U': 0b0000, 'C': 0b1000, 'N': 0b0100, 'V': 0b0010, 'Z': 0b0001}


 def parse_reg(token):
    # normalize to uppercase, then look up in REGS
    r = token.upper()
    if r not in REGS:
        raise ValueError(f"Unknown register: {token!r}")
    return REGS[r]

 def parse_const(token):
    # constants just get interpreted by int()
    # base 0 means interpret base automatically
    # will handle 0x for hex, etc.
    return int(token, 0)

 def parse_operands(tokens):
    # parse the , and M[] out of the tokens
    result = []
    for tok in tokens:
        if tok.upper().startswith('M['):
            tok = tok[2:]
        result.append(tok.rstrip(',]'))
    return result

 def int_to_binstr(val, bits):
    # convert to binary format, handling 2's complement
    # int_to_binstr(0b10110011, 8) -> "10110011"
    # int_to_binstr(-1, 8) -> "11111111"
    return format(val & ((1 << bits) - 1), f'0{bits}b')

 def parse_source(source):
    # parse assembly source into "records"
    # equates and words were added after the fact for .equ and .word respectively
    # record is in format (label_or_None, mnemonic, operand_tokens, line_number)

    records = []
    equates = {}   # .equ name -> value string, substituted into code lines
    words = []     # .word (name, init_value) in declaration order

    # unlike the dxp_asm, sections can appear multiple times in any order
    # we track the current section we're in while parsing here
    section = None 

    for lineno, line in enumerate(source.splitlines(), 1):
        # strip comments and instruction terminators
        # we treat ; a bit differently than dxp_asm, we don't require them at the end of every instruction
        # but we can parse the original dxp assembly files just fine with this looser interpretation
        line = line.split(';')[0] 

        # substitute .equ names before tokenizing code lines
        if section == 'code':
            for name, val in equates.items():
                # use regex with word boundary (\b) so we don't accidentally replace substrings
                # (e.g. COUNT inside DISCOUNT should not be replaced)
                line = re.sub(r'\b' + re.escape(name) + r'\b', val, line)

        tokens = line.split()
        if not tokens:
            continue

        first = tokens[0]

        if first == '.directives':    section = 'directives';  continue
        if first == '.enddirectives': section = None;          continue
        if first == '.constants':     section = 'constants';   continue
        if first == '.endconstants':  section = None;          continue
        if first == '.code':          section = 'code';        continue
        if first == '.endcode':       section = None;          continue

        if section == 'directives' and first == '.equ' and len(tokens) >= 3:
            equates[tokens[1]] = tokens[2]
            continue

        if section == 'constants' and first == '.word' and len(tokens) >= 2:
            words.append((tokens[1], tokens[2] if len(tokens) >= 3 else '0x00'))
            continue

        if section != 'code':
            continue

        label = None
        if first.startswith('@'):
            label = first
            tokens = tokens[1:]
            if not tokens:
                continue
            first = tokens[0]

        records.append((label, first.upper(), tokens[1:], lineno))

    return records, words


 def assemble(records, words=()):
    # take the records and actually assemble them into the .mif
    # we would assemble directly into binary and then convert to .mif, 
    #   but we want to have comments preserving the original assembly for debugging
    # returns (output, final_addr) where output is a list of (addr, byte_val, comment)
    # .word data bytes are appended after code with their initial values.

    # we need two passes so that we can support JUMP to label
    # first, we resolve the addresses where each label is going to end up
    # second, we emit the actual IW bytes (with the offset for the JUMPs)

    # Pass 1: determine label addresses
    labels = {}
    addr = 0
    for label, mnem, _, lineno in records:
        if label is not None:
            labels[label] = addr
        if mnem not in MNEMONICS:
            raise ValueError(f"Line {lineno}: Unknown mnemonic '{mnem}'")
        itype, _ = MNEMONICS[mnem]
        # C and D get encoded to 2 bytes
        addr += 2 if itype in ('C', 'D') else 1

    # assign .word addresses sequentially after code
    word_addrs = {}
    for name, init in words:
        word_addrs[name] = addr
        addr += 1

    # Pass 2: assemble
    output = []
    addr = 0

    for label, mnem, operand_tokens, lineno in records:
        itype, opcode = MNEMONICS[mnem]

        ops = parse_operands(operand_tokens)

        # inject the resolved .word addresses
        # .equ was already handled during tokenization
        if word_addrs:
            ops = [hex(word_addrs[op]) if op in word_addrs else op for op in ops]

        if itype == 'A':
            rsd = parse_reg(ops[0])
            rs2 = parse_reg(ops[1])
            iw = (opcode << 4) | (rsd << 2) | rs2
            # output (address, IW, and comment)
            # we use the raw operand tokens to keep the , etc. for free
            output.append((addr, iw, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}"))
            addr += 1

        elif itype == 'B':
            rsd = parse_reg(ops[0])
            k = parse_const(ops[1])
            iw = (opcode << 4) | (rsd << 2) | (k & 0x3)
            output.append((addr, iw, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}"))
            addr += 1

        elif itype == 'C':
            rsd = parse_reg(ops[0])
            ra = parse_reg(ops[1])
            offset = parse_const(ops[2])
            iw0 = (opcode << 4) | (rsd << 2) | ra
            output.append((addr, iw0, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}"))
            addr += 1
            output.append((addr, offset & 0xFF, operand_tokens[2]))
            addr += 1

        elif itype == 'D':
            cond_str = ops[0]
            if cond_str not in JUMP_CONDS:
                raise ValueError(f"Line {lineno}: Invalid jump condition '{cond_str}'")
            cond = JUMP_CONDS[cond_str]
            iw0 = (opcode << 4) | cond
            output.append((addr, iw0, f"{mnem} {operand_tokens[0]}"))
            addr += 1

            # we use the already resolved label addresses to emit the relative jump offset
            if ops[1] not in labels:
                raise ValueError(f"Line {lineno}: Undefined label '{ops[1]}'")
            
            jump_offset = labels[ops[1]] - addr
            output.append((addr, jump_offset & 0xFF, operand_tokens[1]))
            addr += 1

    # emit .word initial values after code
    for name, init in words:
        output.append((word_addrs[name], int(init, 0) & 0xFF, f".word {name}"))

    return output, addr


 def write_mif(output, final_addr, out_path):
    with open(out_path, 'w') as f:
        f.write("--Program Memory Initialization File\n")
        f.write("--Created by jjg_asm\n")
        f.write("WIDTH = 8;\n")
        f.write("DEPTH = 1024;\n")
        f.write("ADDRESS_RADIX = HEX;\t% Can be HEX, BIN or DEC %\n")
        f.write("DATA_RADIX = BIN;\t% Can be HEX, BIN or DEC %\n")
        f.write("\nCONTENT BEGIN\n\n")
        for addr, byte_val, comment in output:
            f.write(f"{addr:04x} : {int_to_binstr(byte_val, 8)}; % {comment} %\n")
        f.write(f"[ {final_addr:04x} .. 3FF ] : 00000000; % Fill the remaining locations with 0 %\n")
        f.write(" END;\n")


 def main():
    if len(sys.argv) < 2:
        print(f"Usage: {sys.argv[0]} <source.s> [output.mif]", file=sys.stderr)
        sys.exit(1)

    src = sys.argv[1]
    # if the source is a .txt or .s file, replace the extension with .mif, otherwise, just append .mif on the end
    dst = sys.argv[2] if len(sys.argv) >= 3 else re.sub(r'\.txt$', '.mif', src) and re.sub(r'\.s$', '.mif', src)
    if dst == src:
        dst = src + '.mif'

    with open(src) as f:
        source = f.read()

    records, words = parse_source(source)
    output, final_addr = assemble(records, words)
    write_mif(output, final_addr, dst)
    print(f"Assembled {len(records)} instructions -> {final_addr} bytes -> {dst}")


 if __name__ == '__main__':
    main()
	"""
	jjgRISC assembler
	Assembles fmlRISC assembly (.s) to Altera .mif format.

	Based on dxp_asm, which is written in C, aimed to be implemented in a cleaner way.

	I added support for .equ and .word directives, .word doesn't happen to be very useful for SMIOP due to the fact that you can't load PM into registers.
	Also, I fixed the jump implementation. Needs a second pass to calculate the label address to jump to first.

	I implemented support for all memnotics for all IDNs, so hopefully it should be usable for everyone.

	Usage: python3 jjg_asm.py <source.s> [output.mif]

	Instruction formats (all 8-bit words):
	Type A ADD SUB XOR AND OR CPY : opcode[4] Rsd[2] Rs2[2]
	Type B INC DEC SHRA SHLL RRC IN OUT : opcode[4] Rsd[2] K[2]
	Type C LD ST : IW0 = opcode[4] Rsd[2] Ra[2], IW1 = offset[8]
	Type D JUMP : IW0 = 1101 cond[4], IW1 = (target - IW1_addr)[8]
	"""

	import sys
	import re

	# (type, opcode)
	MNEMONICS = {
	'ADD': ('A', 0b0000),
	'SUB': ('A', 0b0001),
	'INC': ('B', 0b0010),
	'DEC': ('B', 0b0011),
	'XOR': ('A', 0b0100),
	'AND': ('A', 0b0101),
	'OR': ('A', 0b0110),
	'CPY': ('A', 0b0111),
	'SHRA': ('B', 0b1000),
	'SHRL': ('B', 0b1001), # other IDN
	'SHLL': ('B', 0b1001),
	'RRC': ('B', 0b1010),
	'RLC': ('B', 0b1010), # other IDN
	'LD': ('C', 0b1011),
	'ST': ('C', 0b1100),
	'JUMP': ('D', 0b1101),
	'IN': ('B', 0b1110), # other IDN
	'POP': ('B', 0b1110), # other IDN
	'OUT': ('B', 0b1111),
	'PUSH': ('B', 0b1111),
	}

	REGS = {'R0': 0b00, 'R1': 0b01, 'R2': 0b10, 'R3': 0b11}

	# JUMP condition field encodings
	JUMP_CONDS = {'U': 0b0000, 'C': 0b1000, 'N': 0b0100, 'V': 0b0010, 'Z': 0b0001}


	def parse_reg(token):
	# normalize to uppercase, then look up in REGS
	r = token.upper()
	if r not in REGS:
	raise ValueError(f"Unknown register: {token!r}")
	return REGS[r]

	def parse_const(token):
	# constants just get interpreted by int()
	# base 0 means interpret base automatically
	# will handle 0x for hex, etc.
	return int(token, 0)

	def parse_operands(tokens):
	# parse the , and M[] out of the tokens
	result = []
	for tok in tokens:
	if tok.upper().startswith('M['):
	tok = tok[2:]
	result.append(tok.rstrip(',]'))
	return result

	def int_to_binstr(val, bits):
	# convert to binary format, handling 2's complement
	# int_to_binstr(0b10110011, 8) -> "10110011"
	# int_to_binstr(-1, 8) -> "11111111"
	return format(val & ((1 << bits) - 1), f'0{bits}b')

	def parse_source(source):
	# parse assembly source into "records"
	# equates and words were added after the fact for .equ and .word respectively
	# record is in format (label_or_None, mnemonic, operand_tokens, line_number)

	records = []
	equates = {} # .equ name -> value string, substituted into code lines
	words = [] # .word (name, init_value) in declaration order

	# unlike the dxp_asm, sections can appear multiple times in any order
	# we track the current section we're in while parsing here
	section = None

	for lineno, line in enumerate(source.splitlines(), 1):
	# strip comments and instruction terminators
	# we treat ; a bit differently than dxp_asm, we don't require them at the end of every instruction
	# but we can parse the original dxp assembly files just fine with this looser interpretation
	line = line.split(';')[0]

	# substitute .equ names before tokenizing code lines
	if section == 'code':
	for name, val in equates.items():
	# use regex with word boundary (\b) so we don't accidentally replace substrings
	# (e.g. COUNT inside DISCOUNT should not be replaced)
	line = re.sub(r'\b' + re.escape(name) + r'\b', val, line)

	tokens = line.split()
	if not tokens:
	continue

	first = tokens[0]

	if first == '.directives': section = 'directives'; continue
	if first == '.enddirectives': section = None; continue
	if first == '.constants': section = 'constants'; continue
	if first == '.endconstants': section = None; continue
	if first == '.code': section = 'code'; continue
	if first == '.endcode': section = None; continue

	if section == 'directives' and first == '.equ' and len(tokens) >= 3:
	equates[tokens[1]] = tokens[2]
	continue

	if section == 'constants' and first == '.word' and len(tokens) >= 2:
	words.append((tokens[1], tokens[2] if len(tokens) >= 3 else '0x00'))
	continue

	if section != 'code':
	continue

	label = None
	if first.startswith('@'):
	label = first
	tokens = tokens[1:]
	if not tokens:
	continue
	first = tokens[0]

	records.append((label, first.upper(), tokens[1:], lineno))

	return records, words


	def assemble(records, words=()):
	# take the records and actually assemble them into the .mif
	# we would assemble directly into binary and then convert to .mif,
	# but we want to have comments preserving the original assembly for debugging
	# returns (output, final_addr) where output is a list of (addr, byte_val, comment)
	# .word data bytes are appended after code with their initial values.

	# we need two passes so that we can support JUMP to label
	# first, we resolve the addresses where each label is going to end up
	# second, we emit the actual IW bytes (with the offset for the JUMPs)

	# Pass 1: determine label addresses
	labels = {}
	addr = 0
	for label, mnem, _, lineno in records:
	if label is not None:
	labels[label] = addr
	if mnem not in MNEMONICS:
	raise ValueError(f"Line {lineno}: Unknown mnemonic '{mnem}'")
	itype, _ = MNEMONICS[mnem]
	# C and D get encoded to 2 bytes
	addr += 2 if itype in ('C', 'D') else 1

	# assign .word addresses sequentially after code
	word_addrs = {}
	for name, init in words:
	word_addrs[name] = addr
	addr += 1

	# Pass 2: assemble
	output = []
	addr = 0

	for label, mnem, operand_tokens, lineno in records:
	itype, opcode = MNEMONICS[mnem]

	ops = parse_operands(operand_tokens)

	# inject the resolved .word addresses
	# .equ was already handled during tokenization
	if word_addrs:
	ops = [hex(word_addrs[op]) if op in word_addrs else op for op in ops]

	if itype == 'A':
	rsd = parse_reg(ops[0])
	rs2 = parse_reg(ops[1])
	iw = (opcode << 4) \| (rsd << 2) \| rs2
	# output (address, IW, and comment)
	# we use the raw operand tokens to keep the , etc. for free
	output.append((addr, iw, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}"))
	addr += 1

	elif itype == 'B':
	rsd = parse_reg(ops[0])
	k = parse_const(ops[1])
	iw = (opcode << 4) \| (rsd << 2) \| (k & 0x3)
	output.append((addr, iw, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}"))
	addr += 1

	elif itype == 'C':
	rsd = parse_reg(ops[0])
	ra = parse_reg(ops[1])
	offset = parse_const(ops[2])
	iw0 = (opcode << 4) \| (rsd << 2) \| ra
	output.append((addr, iw0, f"{mnem} {operand_tokens[0]} {operand_tokens[1]}"))
	addr += 1
	output.append((addr, offset & 0xFF, operand_tokens[2]))
	addr += 1

	elif itype == 'D':
	cond_str = ops[0]
	if cond_str not in JUMP_CONDS:
	raise ValueError(f"Line {lineno}: Invalid jump condition '{cond_str}'")
	cond = JUMP_CONDS[cond_str]
	iw0 = (opcode << 4) \| cond
	output.append((addr, iw0, f"{mnem} {operand_tokens[0]}"))
	addr += 1

	# we use the already resolved label addresses to emit the relative jump offset
	if ops[1] not in labels:
	raise ValueError(f"Line {lineno}: Undefined label '{ops[1]}'")

	jump_offset = labels[ops[1]] - addr
	output.append((addr, jump_offset & 0xFF, operand_tokens[1]))
	addr += 1

	# emit .word initial values after code
	for name, init in words:
	output.append((word_addrs[name], int(init, 0) & 0xFF, f".word {name}"))

	return output, addr


	def write_mif(output, final_addr, out_path):
	with open(out_path, 'w') as f:
	f.write("--Program Memory Initialization File\n")
	f.write("--Created by jjg_asm\n")
	f.write("WIDTH = 8;\n")
	f.write("DEPTH = 1024;\n")
	f.write("ADDRESS_RADIX = HEX;\t% Can be HEX, BIN or DEC %\n")
	f.write("DATA_RADIX = BIN;\t% Can be HEX, BIN or DEC %\n")
	f.write("\nCONTENT BEGIN\n\n")
	for addr, byte_val, comment in output:
	f.write(f"{addr:04x} : {int_to_binstr(byte_val, 8)}; % {comment} %\n")
	f.write(f"[ {final_addr:04x} .. 3FF ] : 00000000; % Fill the remaining locations with 0 %\n")
	f.write(" END;\n")


	def main():
	if len(sys.argv) < 2:
	print(f"Usage: {sys.argv[0]} <source.s> [output.mif]", file=sys.stderr)
	sys.exit(1)

	src = sys.argv[1]
	# if the source is a .txt or .s file, replace the extension with .mif, otherwise, just append .mif on the end
	dst = sys.argv[2] if len(sys.argv) >= 3 else re.sub(r'\.txt$', '.mif', src) and re.sub(r'\.s$', '.mif', src)
	if dst == src:
	dst = src + '.mif'

	with open(src) as f:
	source = f.read()

	records, words = parse_source(source)
	output, final_addr = assemble(records, words)
	write_mif(output, final_addr, dst)
	print(f"Assembled {len(records)} instructions -> {final_addr} bytes -> {dst}")


	if __name__ == '__main__':
	main()
No results found