Skip to content

Instantly share code, notes, and snippets.

@evanthebouncy
Created April 17, 2026 15:19
Show Gist options
  • Select an option

  • Save evanthebouncy/4ad78122a156225685aa84d10dc87f4b to your computer and use it in GitHub Desktop.

Select an option

Save evanthebouncy/4ad78122a156225685aa84d10dc87f4b to your computer and use it in GitHub Desktop.
crawler environment
#!/usr/bin/env python3
"""
crawler_env.py – 2D Crawler RL Environment (Gymnasium-compatible)
=====================================================================
Robot description
-----------------
Body : rectangle, slides left/right on the floor (no tipping).
Arm : L-shaped, two rigid segments attached at the front-top of the body.
segment-1 longer, ~horizontal angle θ1 from horizontal [−30°, +30°]
segment-2 shorter, ~vertical angle θ2 from horizontal [−120°, −60°]
(θ2 = −90° → straight down; +x = right, +y = up)
Observation : np.array([θ1, θ2], dtype=float32) – angles in degrees
Actions : 0 θ1 + step 1 θ1 − step
2 θ2 + step 3 θ2 − step
Reward : Δbody_x per step (positive = rightward progress)
Physics
-------
When the foot tip is at or below the floor (world y ≤ 0) it is *planted*.
Any subsequent angle change rigidly slides the body so that the foot's
world-x stays fixed. When angles lift the foot above y = 0 it detaches.
The body can only translate horizontally (no vertical dynamics, no torque).
Rendering
---------
render=False headless – no window (default, use for training)
render=True opens a pygame window; each env.step() draws one frame and
caps the loop at AGENT_FPS (10 fps) so the agent's behaviour
is watchable in real time. Esc or closing the window stops
rendering cleanly.
Example – watch a policy run:
env = CrawlerEnv(render=True)
obs, _ = env.reset()
for _ in range(500):
obs, reward, terminated, truncated, info = env.step(policy.act(obs))
env.close()
Human-interactive mode (run this file directly)
------------------------------------------------
python crawler_env.py
W / ↑ θ1 increase S / ↓ θ1 decrease
A / ← θ2 decrease D / → θ2 increase
R reset Esc quit
Crawling hint (a working 4-step cycle)
----------------------------------------
1. W – raise θ1 to lift the foot off the ground
2. D – tilt θ2 toward −60° to push the foot forward (in air)
3. S – lower θ1 to plant the foot back on the ground
4. A – tilt θ2 toward −120° while planted → body is dragged forward
Repeat. Reward = Δbody_x per step (positive = rightward progress).
"""
import sys
import math
import numpy as np
try:
import gymnasium as gym
from gymnasium import spaces
_GYM_BASE = gym.Env
except ImportError:
_GYM_BASE = object
spaces = None
# ── world geometry ─────────────────────────────────────────────────────────────
BODY_W = 80 # body width (world units ≈ pixels at 1:1)
BODY_H = 35 # body height (bottom rests at y=0)
SEG1 = 65 # length of horizontal arm segment
SEG2 = 55 # length of vertical arm segment
T1_MIN, T1_MAX = 15.0, 21.0 # θ1 limits in degrees → {15, 18, 21}
T2_MIN, T2_MAX = -93.0, -75.0 # θ2 limits in degrees → {-93, -90, ..., -75}
STEP = 3.0 # degrees per discrete action
# ── display ────────────────────────────────────────────────────────────────────
SW, SH = 960, 430
FLOOR_Y = 320 # screen-y of the floor line (screen y increases downward)
BODY_SCR = SW // 3
HUMAN_FPS = 60 # interactive mode
AGENT_FPS = 10 # agent-watched mode
C = dict(
bg = (232, 238, 244),
ground = ( 95, 95, 95),
floor = ( 50, 50, 50),
tick = (165, 165, 165),
tick_lbl = (145, 145, 145),
body = ( 65, 125, 200),
body_ol = ( 35, 75, 140),
seg1 = (210, 110, 45),
seg2 = (175, 70, 25),
joint = (255, 210, 0),
foot = (215, 45, 45),
planted = ( 40, 205, 60),
text = ( 20, 20, 20),
dim = (130, 130, 130),
pos = ( 35, 145, 35),
)
def w2s(wx, wy, cam_x):
"""World coords (x→, y↑, floor at y=0) → screen coords (x→, y↓)."""
return int(wx - cam_x + BODY_SCR), int(FLOOR_Y - wy)
# ── Environment ────────────────────────────────────────────────────────────────
class CrawlerEnv(_GYM_BASE):
"""
Gymnasium-compatible 2D crawler. See module docstring for full description.
"""
metadata = {"render_fps": AGENT_FPS}
def __init__(self, render=False):
self.render = render
if spaces is not None:
self.observation_space = spaces.Box(
low = np.array([T1_MIN, T2_MIN], dtype=np.int32),
high = np.array([T1_MAX, T2_MAX], dtype=np.int32),
dtype = np.int32,
)
self.action_space = spaces.Discrete(4)
# pygame handles – created lazily on first render() call
self._screen = None
self._clock = None
self._font = None
self._sfont = None
self._cam_x = 0.0
self._init_state()
# ── Gymnasium API ────────────────────────────────────────────────────────
def reset(self, seed=None, options=None):
if _GYM_BASE is not object:
super().reset(seed=seed)
self._init_state()
if self.render:
self._ensure_pygame()
self._cam_x = float(self.body_x + BODY_W / 2)
self._draw()
return self._obs(), {}
def step(self, action):
"""
Apply one of four discrete actions.
Returns (obs, reward, terminated, truncated, info).
terminated is always False – no goal/failure state is defined.
info contains: planted (bool), dist (float), body_x (float).
"""
action = int(action)
assert action in (0, 1, 2, 3), f"action must be 0-3, got {action}"
was_planted = self._foot_on_ground()
if was_planted:
anchor_x = self._kinematics()[2][0]
old_x = self.body_x
if action == 0: self.theta1 = min(T1_MAX, self.theta1 + STEP)
elif action == 1: self.theta1 = max(T1_MIN, self.theta1 - STEP)
elif action == 2: self.theta2 = min(T2_MAX, self.theta2 + STEP)
elif action == 3: self.theta2 = max(T2_MIN, self.theta2 - STEP)
if was_planted:
self.body_x = anchor_x - self._foot_rel_x()
self.last_action = action
self.planted = self._foot_on_ground()
reward = self.body_x - old_x
self.dist += reward
self.n_steps += 1
if self.render:
self._draw()
self._clock.tick(AGENT_FPS)
# pump events so the window stays responsive
import pygame
for event in pygame.event.get():
if event.type == pygame.QUIT:
self.close()
elif event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE:
self.close()
return self._obs(), reward, False, False, {
"planted": self.planted,
"dist": self.dist,
"body_x": self.body_x,
}
def render(self):
"""Explicit render call – draws the current frame (agent mode)."""
if not self.render:
return
self._ensure_pygame()
self._draw()
def close(self):
if self._screen is not None:
import pygame
pygame.quit()
self._screen = None
# ── kinematics ──────────────────────────────────────────────────────────
def _kinematics(self):
"""Return (shoulder, elbow, foot) in world coords."""
sx = self.body_x + BODY_W
sy = float(BODY_H)
t1 = math.radians(self.theta1)
ex = sx + SEG1 * math.cos(t1)
ey = sy + SEG1 * math.sin(t1)
t2 = math.radians(self.theta2)
fx = ex + SEG2 * math.cos(t2)
fy = ey + SEG2 * math.sin(t2)
return (sx, sy), (ex, ey), (fx, fy)
# ── private ─────────────────────────────────────────────────────────────
def _init_state(self):
self.body_x = 0.0
self.theta1 = 15.0 # foot just barely planted within MAX_FOOT_DEPTH
self.theta2 = -90.0
self.dist = 0.0
self.n_steps = 0
self.last_action = None
self.planted = self._foot_on_ground()
def _obs(self):
t1 = int(round(self.theta1 / STEP) * STEP)
t2 = int(round(self.theta2 / STEP) * STEP)
return np.array([t1, t2], dtype=np.int32)
def _foot_on_ground(self):
return self._kinematics()[2][1] <= 0.5
@staticmethod
def get_legal_actions(obs):
"""Return the list of actions that won't hit a joint limit at obs."""
t1, t2 = int(obs[0]), int(obs[1])
legal = []
if t1 < T1_MAX: legal.append(0) # theta1+ not yet at max
if t1 > T1_MIN: legal.append(1) # theta1- not yet at min
if t2 < T2_MAX: legal.append(2) # theta2+ not yet at max
if t2 > T2_MIN: legal.append(3) # theta2- not yet at min
return legal
@staticmethod
def all_states():
"""Return a list of every valid (theta1, theta2) obs as int tuples."""
n1 = round((T1_MAX - T1_MIN) / STEP)
n2 = round((T2_MAX - T2_MIN) / STEP)
return [
(int(T1_MIN + i * STEP), int(T2_MIN + j * STEP))
for i in range(n1 + 1)
for j in range(n2 + 1)
]
def _foot_rel_x(self):
t1 = math.radians(self.theta1)
t2 = math.radians(self.theta2)
return BODY_W + SEG1 * math.cos(t1) + SEG2 * math.cos(t2)
def _ensure_pygame(self):
import pygame
if self._screen is None:
pygame.init()
pygame.display.set_caption("Crawler RL Environment")
self._screen = pygame.display.set_mode((SW, SH))
self._clock = pygame.time.Clock()
self._font = pygame.font.SysFont("monospace", 16)
self._sfont = pygame.font.SysFont("monospace", 11)
self._cam_x = float(self.body_x + BODY_W / 2)
def _draw(self):
"""Draw one frame into self._screen (camera smoothed)."""
self._cam_x += (self.body_x + BODY_W / 2 - self._cam_x) * 0.2
hints = [
f"step {self.n_steps} Esc to quit",
]
_draw_frame(self._screen, self, self._font, self._sfont, self._cam_x,
bottom_hints=hints)
# ── shared drawing routine ─────────────────────────────────────────────────────
def _draw_frame(screen, env, font, sfont, cam_x, bottom_hints=None):
import pygame
screen.fill(C['bg'])
pygame.draw.rect(screen, C['ground'], (0, FLOOR_Y, SW, SH - FLOOR_Y))
pygame.draw.line(screen, C['floor'], (0, FLOOR_Y), (SW, FLOOR_Y), 2)
first_tick = int(cam_x - BODY_SCR) // 50 * 50
for tick in range(first_tick, first_tick + SW + 100, 50):
sx = int(tick - cam_x + BODY_SCR)
if -20 <= sx <= SW + 20:
pygame.draw.line(screen, C['tick'], (sx, FLOOR_Y), (sx, FLOOR_Y + 6), 1)
lbl = sfont.render(str(tick), True, C['tick_lbl'])
screen.blit(lbl, (sx - lbl.get_width() // 2, FLOOR_Y + 9))
shoulder, elbow, foot = env._kinematics()
bx, by = w2s(env.body_x, BODY_H, cam_x)
body_rect = pygame.Rect(bx, by, BODY_W, BODY_H)
pygame.draw.rect(screen, C['body'], body_rect)
pygame.draw.rect(screen, C['body_ol'], body_rect, 2)
for wx_off in (BODY_W // 4, 3 * BODY_W // 4):
pygame.draw.circle(screen, C['body_ol'], (bx + wx_off, FLOOR_Y), 6, 2)
sp = w2s(*shoulder, cam_x)
ep = w2s(*elbow, cam_x)
fp = w2s(*foot, cam_x)
pygame.draw.line(screen, C['seg1'], sp, ep, 6)
pygame.draw.line(screen, C['seg2'], ep, fp, 5)
pygame.draw.circle(screen, C['joint'], sp, 7)
pygame.draw.circle(screen, C['joint'], ep, 6)
fc = C['planted'] if env.planted else C['foot']
pygame.draw.circle(screen, fc, fp, 7)
if env.planted:
pygame.draw.line(screen, C['planted'], fp, (fp[0], FLOOR_Y), 1)
_ACTION_LABELS = {0: "0 theta1+ (W)", 1: "1 theta1- (S)",
2: "2 theta2+ (D)", 3: "3 theta2- (A)"}
action_str = _ACTION_LABELS.get(env.last_action, "—")
hud = [
(f"last action : {action_str}", C['text']),
(f"theta1 (horiz) : {env.theta1:+6.1f} deg [{T1_MIN:+.0f}, {T1_MAX:+.0f}]", C['text']),
(f"theta2 (vert) : {env.theta2:+6.1f} deg [{T2_MIN:+.0f}, {T2_MAX:+.0f}]", C['text']),
(f"position : {env.body_x:+.0f}", C['pos']),
(f"total distance : {env.dist:+.1f}", C['pos']),
(f"foot : {'PLANTED [green dot]' if env.planted else 'lifted [red dot]'}",
C['planted'] if env.planted else C['foot']),
]
if bottom_hints:
hud.append(("", C['text']))
for h in bottom_hints:
hud.append((h, C['dim']))
for i, (txt, col) in enumerate(hud):
screen.blit(font.render(txt, True, col), (12, 10 + i * 23))
pygame.display.flip()
# ── state-space trajectory plot ───────────────────────────────────────────────
def plot_trajectory(trajectory, title="State trajectory (θ1 × θ2)"):
"""
Plot the path of states visited in the θ1 × θ2 space.
Parameters
----------
trajectory : list of (obs, action, reward, info) tuples
As returned by get_rollout() in random_policy.py.
title : str
Window / figure title.
The path is drawn as a colour-gradient line (blue → red over time) with
small arrows showing direction of travel. Start is marked with a green
circle, end with a red square. All valid grid points are shown as faint
grey dots in the background.
"""
import matplotlib.pyplot as plt
import matplotlib.collections as mc
import numpy as np
states = CrawlerEnv.all_states()
grid_t1 = [s[0] for s in states]
grid_t2 = [s[1] for s in states]
t1s = [int(obs[0]) for obs, *_ in trajectory]
t2s = [int(obs[1]) for obs, *_ in trajectory]
n = len(t1s)
fig, ax = plt.subplots(figsize=(8, 6))
fig.suptitle(title)
# faint grid dots for all valid states
ax.scatter(grid_t1, grid_t2, s=12, color="lightgrey", zorder=1, label="valid states")
# colour-gradient path: segments coloured blue→red by time
cmap = plt.get_cmap("coolwarm")
colors = [cmap(i / max(n - 1, 1)) for i in range(n - 1)]
segments = [[(t1s[i], t2s[i]), (t1s[i+1], t2s[i+1])] for i in range(n - 1)]
lc = mc.LineCollection(segments, colors=colors, linewidths=1.5, zorder=2)
ax.add_collection(lc)
# arrows every ~20 steps to show direction
step = max(1, n // 20)
for i in range(0, n - 1, step):
dt1 = t1s[i+1] - t1s[i]
dt2 = t2s[i+1] - t2s[i]
if dt1 != 0 or dt2 != 0:
ax.annotate("", xy=(t1s[i+1], t2s[i+1]), xytext=(t1s[i], t2s[i]),
arrowprops=dict(arrowstyle="->", color=cmap(i / max(n-1, 1)),
lw=1.2), zorder=3)
# start / end markers
ax.scatter([t1s[0]], [t2s[0]], s=80, color="green", marker="o",
zorder=4, label="start")
ax.scatter([t1s[-1]], [t2s[-1]], s=80, color="red", marker="s",
zorder=4, label="end")
ax.set_xlabel("θ1 (horizontal arm, deg)")
ax.set_ylabel("θ2 (vertical arm, deg)")
ax.set_xlim(T1_MIN - 5, T1_MAX + 5)
ax.set_ylim(T2_MIN - 5, T2_MAX + 5)
ax.set_xticks(range(int(T1_MIN), int(T1_MAX) + 1, int(STEP) * 3))
ax.set_yticks(range(int(T2_MIN), int(T2_MAX) + 1, int(STEP) * 3))
ax.grid(True, linestyle=":", linewidth=0.5, alpha=0.4)
ax.legend(loc="upper right", fontsize=8)
# colour bar showing time progression
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(0, n))
sm.set_array([])
fig.colorbar(sm, ax=ax, label="step")
plt.tight_layout()
plt.show()
# ── human-interactive main ─────────────────────────────────────────────────────
def main():
"""Run the crawler in human-interactive mode (WASD / arrow keys)."""
import pygame
pygame.init()
screen = pygame.display.set_mode((SW, SH))
pygame.display.set_caption("Crawler RL Environment – Human Play")
clock = pygame.time.Clock()
font = pygame.font.SysFont("monospace", 16)
sfont = pygame.font.SysFont("monospace", 11)
env = CrawlerEnv(render=False) # headless – this loop owns the rendering
obs, _ = env.reset()
cam_x = float(env.body_x + BODY_W / 2)
KEY_MAP = {
pygame.K_w: 0, pygame.K_UP: 0,
pygame.K_s: 1, pygame.K_DOWN: 1,
pygame.K_a: 3, pygame.K_LEFT: 3,
pygame.K_d: 2, pygame.K_RIGHT: 2,
}
controls = [
"W/↑ theta1+ S/↓ theta1-",
"A/← theta2- D/→ theta2+",
"R reset Esc quit",
]
print(__doc__)
print("─" * 56)
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit(); sys.exit()
elif event.type == pygame.KEYDOWN:
if event.key == pygame.K_ESCAPE:
pygame.quit(); sys.exit()
elif event.key == pygame.K_r:
obs, _ = env.reset()
cam_x = float(env.body_x + BODY_W / 2)
print(" [reset]")
elif event.key in KEY_MAP:
a = KEY_MAP[event.key]
obs, rew, _, _, info = env.step(a)
arrow = "→" if rew > 0.05 else ("←" if rew < -0.05 else "·")
print(f" step {env.n_steps:4d} "
f"θ1={obs[0]:+6.1f}° θ2={obs[1]:+7.1f}° "
f"reward={rew:+5.1f} {arrow} "
f"planted={'Y' if info['planted'] else 'n'} "
f"total={info['dist']:+.1f}")
cam_x += (env.body_x + BODY_W / 2 - cam_x) * 0.12
_draw_frame(screen, env, font, sfont, cam_x, bottom_hints=controls)
clock.tick(HUMAN_FPS)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment