Created
April 17, 2026 15:19
-
-
Save evanthebouncy/4ad78122a156225685aa84d10dc87f4b to your computer and use it in GitHub Desktop.
crawler environment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| crawler_env.py – 2D Crawler RL Environment (Gymnasium-compatible) | |
| ===================================================================== | |
| Robot description | |
| ----------------- | |
| Body : rectangle, slides left/right on the floor (no tipping). | |
| Arm : L-shaped, two rigid segments attached at the front-top of the body. | |
| segment-1 longer, ~horizontal angle θ1 from horizontal [−30°, +30°] | |
| segment-2 shorter, ~vertical angle θ2 from horizontal [−120°, −60°] | |
| (θ2 = −90° → straight down; +x = right, +y = up) | |
| Observation : np.array([θ1, θ2], dtype=float32) – angles in degrees | |
| Actions : 0 θ1 + step 1 θ1 − step | |
| 2 θ2 + step 3 θ2 − step | |
| Reward : Δbody_x per step (positive = rightward progress) | |
| Physics | |
| ------- | |
| When the foot tip is at or below the floor (world y ≤ 0) it is *planted*. | |
| Any subsequent angle change rigidly slides the body so that the foot's | |
| world-x stays fixed. When angles lift the foot above y = 0 it detaches. | |
| The body can only translate horizontally (no vertical dynamics, no torque). | |
| Rendering | |
| --------- | |
| render=False headless – no window (default, use for training) | |
| render=True opens a pygame window; each env.step() draws one frame and | |
| caps the loop at AGENT_FPS (10 fps) so the agent's behaviour | |
| is watchable in real time. Esc or closing the window stops | |
| rendering cleanly. | |
| Example – watch a policy run: | |
| env = CrawlerEnv(render=True) | |
| obs, _ = env.reset() | |
| for _ in range(500): | |
| obs, reward, terminated, truncated, info = env.step(policy.act(obs)) | |
| env.close() | |
| Human-interactive mode (run this file directly) | |
| ------------------------------------------------ | |
| python crawler_env.py | |
| W / ↑ θ1 increase S / ↓ θ1 decrease | |
| A / ← θ2 decrease D / → θ2 increase | |
| R reset Esc quit | |
| Crawling hint (a working 4-step cycle) | |
| ---------------------------------------- | |
| 1. W – raise θ1 to lift the foot off the ground | |
| 2. D – tilt θ2 toward −60° to push the foot forward (in air) | |
| 3. S – lower θ1 to plant the foot back on the ground | |
| 4. A – tilt θ2 toward −120° while planted → body is dragged forward | |
| Repeat. Reward = Δbody_x per step (positive = rightward progress). | |
| """ | |
| import sys | |
| import math | |
| import numpy as np | |
| try: | |
| import gymnasium as gym | |
| from gymnasium import spaces | |
| _GYM_BASE = gym.Env | |
| except ImportError: | |
| _GYM_BASE = object | |
| spaces = None | |
| # ── world geometry ───────────────────────────────────────────────────────────── | |
| BODY_W = 80 # body width (world units ≈ pixels at 1:1) | |
| BODY_H = 35 # body height (bottom rests at y=0) | |
| SEG1 = 65 # length of horizontal arm segment | |
| SEG2 = 55 # length of vertical arm segment | |
| T1_MIN, T1_MAX = 15.0, 21.0 # θ1 limits in degrees → {15, 18, 21} | |
| T2_MIN, T2_MAX = -93.0, -75.0 # θ2 limits in degrees → {-93, -90, ..., -75} | |
| STEP = 3.0 # degrees per discrete action | |
| # ── display ──────────────────────────────────────────────────────────────────── | |
| SW, SH = 960, 430 | |
| FLOOR_Y = 320 # screen-y of the floor line (screen y increases downward) | |
| BODY_SCR = SW // 3 | |
| HUMAN_FPS = 60 # interactive mode | |
| AGENT_FPS = 10 # agent-watched mode | |
| C = dict( | |
| bg = (232, 238, 244), | |
| ground = ( 95, 95, 95), | |
| floor = ( 50, 50, 50), | |
| tick = (165, 165, 165), | |
| tick_lbl = (145, 145, 145), | |
| body = ( 65, 125, 200), | |
| body_ol = ( 35, 75, 140), | |
| seg1 = (210, 110, 45), | |
| seg2 = (175, 70, 25), | |
| joint = (255, 210, 0), | |
| foot = (215, 45, 45), | |
| planted = ( 40, 205, 60), | |
| text = ( 20, 20, 20), | |
| dim = (130, 130, 130), | |
| pos = ( 35, 145, 35), | |
| ) | |
| def w2s(wx, wy, cam_x): | |
| """World coords (x→, y↑, floor at y=0) → screen coords (x→, y↓).""" | |
| return int(wx - cam_x + BODY_SCR), int(FLOOR_Y - wy) | |
| # ── Environment ──────────────────────────────────────────────────────────────── | |
| class CrawlerEnv(_GYM_BASE): | |
| """ | |
| Gymnasium-compatible 2D crawler. See module docstring for full description. | |
| """ | |
| metadata = {"render_fps": AGENT_FPS} | |
| def __init__(self, render=False): | |
| self.render = render | |
| if spaces is not None: | |
| self.observation_space = spaces.Box( | |
| low = np.array([T1_MIN, T2_MIN], dtype=np.int32), | |
| high = np.array([T1_MAX, T2_MAX], dtype=np.int32), | |
| dtype = np.int32, | |
| ) | |
| self.action_space = spaces.Discrete(4) | |
| # pygame handles – created lazily on first render() call | |
| self._screen = None | |
| self._clock = None | |
| self._font = None | |
| self._sfont = None | |
| self._cam_x = 0.0 | |
| self._init_state() | |
| # ── Gymnasium API ──────────────────────────────────────────────────────── | |
| def reset(self, seed=None, options=None): | |
| if _GYM_BASE is not object: | |
| super().reset(seed=seed) | |
| self._init_state() | |
| if self.render: | |
| self._ensure_pygame() | |
| self._cam_x = float(self.body_x + BODY_W / 2) | |
| self._draw() | |
| return self._obs(), {} | |
| def step(self, action): | |
| """ | |
| Apply one of four discrete actions. | |
| Returns (obs, reward, terminated, truncated, info). | |
| terminated is always False – no goal/failure state is defined. | |
| info contains: planted (bool), dist (float), body_x (float). | |
| """ | |
| action = int(action) | |
| assert action in (0, 1, 2, 3), f"action must be 0-3, got {action}" | |
| was_planted = self._foot_on_ground() | |
| if was_planted: | |
| anchor_x = self._kinematics()[2][0] | |
| old_x = self.body_x | |
| if action == 0: self.theta1 = min(T1_MAX, self.theta1 + STEP) | |
| elif action == 1: self.theta1 = max(T1_MIN, self.theta1 - STEP) | |
| elif action == 2: self.theta2 = min(T2_MAX, self.theta2 + STEP) | |
| elif action == 3: self.theta2 = max(T2_MIN, self.theta2 - STEP) | |
| if was_planted: | |
| self.body_x = anchor_x - self._foot_rel_x() | |
| self.last_action = action | |
| self.planted = self._foot_on_ground() | |
| reward = self.body_x - old_x | |
| self.dist += reward | |
| self.n_steps += 1 | |
| if self.render: | |
| self._draw() | |
| self._clock.tick(AGENT_FPS) | |
| # pump events so the window stays responsive | |
| import pygame | |
| for event in pygame.event.get(): | |
| if event.type == pygame.QUIT: | |
| self.close() | |
| elif event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE: | |
| self.close() | |
| return self._obs(), reward, False, False, { | |
| "planted": self.planted, | |
| "dist": self.dist, | |
| "body_x": self.body_x, | |
| } | |
| def render(self): | |
| """Explicit render call – draws the current frame (agent mode).""" | |
| if not self.render: | |
| return | |
| self._ensure_pygame() | |
| self._draw() | |
| def close(self): | |
| if self._screen is not None: | |
| import pygame | |
| pygame.quit() | |
| self._screen = None | |
| # ── kinematics ────────────────────────────────────────────────────────── | |
| def _kinematics(self): | |
| """Return (shoulder, elbow, foot) in world coords.""" | |
| sx = self.body_x + BODY_W | |
| sy = float(BODY_H) | |
| t1 = math.radians(self.theta1) | |
| ex = sx + SEG1 * math.cos(t1) | |
| ey = sy + SEG1 * math.sin(t1) | |
| t2 = math.radians(self.theta2) | |
| fx = ex + SEG2 * math.cos(t2) | |
| fy = ey + SEG2 * math.sin(t2) | |
| return (sx, sy), (ex, ey), (fx, fy) | |
| # ── private ───────────────────────────────────────────────────────────── | |
| def _init_state(self): | |
| self.body_x = 0.0 | |
| self.theta1 = 15.0 # foot just barely planted within MAX_FOOT_DEPTH | |
| self.theta2 = -90.0 | |
| self.dist = 0.0 | |
| self.n_steps = 0 | |
| self.last_action = None | |
| self.planted = self._foot_on_ground() | |
| def _obs(self): | |
| t1 = int(round(self.theta1 / STEP) * STEP) | |
| t2 = int(round(self.theta2 / STEP) * STEP) | |
| return np.array([t1, t2], dtype=np.int32) | |
| def _foot_on_ground(self): | |
| return self._kinematics()[2][1] <= 0.5 | |
| @staticmethod | |
| def get_legal_actions(obs): | |
| """Return the list of actions that won't hit a joint limit at obs.""" | |
| t1, t2 = int(obs[0]), int(obs[1]) | |
| legal = [] | |
| if t1 < T1_MAX: legal.append(0) # theta1+ not yet at max | |
| if t1 > T1_MIN: legal.append(1) # theta1- not yet at min | |
| if t2 < T2_MAX: legal.append(2) # theta2+ not yet at max | |
| if t2 > T2_MIN: legal.append(3) # theta2- not yet at min | |
| return legal | |
| @staticmethod | |
| def all_states(): | |
| """Return a list of every valid (theta1, theta2) obs as int tuples.""" | |
| n1 = round((T1_MAX - T1_MIN) / STEP) | |
| n2 = round((T2_MAX - T2_MIN) / STEP) | |
| return [ | |
| (int(T1_MIN + i * STEP), int(T2_MIN + j * STEP)) | |
| for i in range(n1 + 1) | |
| for j in range(n2 + 1) | |
| ] | |
| def _foot_rel_x(self): | |
| t1 = math.radians(self.theta1) | |
| t2 = math.radians(self.theta2) | |
| return BODY_W + SEG1 * math.cos(t1) + SEG2 * math.cos(t2) | |
| def _ensure_pygame(self): | |
| import pygame | |
| if self._screen is None: | |
| pygame.init() | |
| pygame.display.set_caption("Crawler RL Environment") | |
| self._screen = pygame.display.set_mode((SW, SH)) | |
| self._clock = pygame.time.Clock() | |
| self._font = pygame.font.SysFont("monospace", 16) | |
| self._sfont = pygame.font.SysFont("monospace", 11) | |
| self._cam_x = float(self.body_x + BODY_W / 2) | |
| def _draw(self): | |
| """Draw one frame into self._screen (camera smoothed).""" | |
| self._cam_x += (self.body_x + BODY_W / 2 - self._cam_x) * 0.2 | |
| hints = [ | |
| f"step {self.n_steps} Esc to quit", | |
| ] | |
| _draw_frame(self._screen, self, self._font, self._sfont, self._cam_x, | |
| bottom_hints=hints) | |
| # ── shared drawing routine ───────────────────────────────────────────────────── | |
| def _draw_frame(screen, env, font, sfont, cam_x, bottom_hints=None): | |
| import pygame | |
| screen.fill(C['bg']) | |
| pygame.draw.rect(screen, C['ground'], (0, FLOOR_Y, SW, SH - FLOOR_Y)) | |
| pygame.draw.line(screen, C['floor'], (0, FLOOR_Y), (SW, FLOOR_Y), 2) | |
| first_tick = int(cam_x - BODY_SCR) // 50 * 50 | |
| for tick in range(first_tick, first_tick + SW + 100, 50): | |
| sx = int(tick - cam_x + BODY_SCR) | |
| if -20 <= sx <= SW + 20: | |
| pygame.draw.line(screen, C['tick'], (sx, FLOOR_Y), (sx, FLOOR_Y + 6), 1) | |
| lbl = sfont.render(str(tick), True, C['tick_lbl']) | |
| screen.blit(lbl, (sx - lbl.get_width() // 2, FLOOR_Y + 9)) | |
| shoulder, elbow, foot = env._kinematics() | |
| bx, by = w2s(env.body_x, BODY_H, cam_x) | |
| body_rect = pygame.Rect(bx, by, BODY_W, BODY_H) | |
| pygame.draw.rect(screen, C['body'], body_rect) | |
| pygame.draw.rect(screen, C['body_ol'], body_rect, 2) | |
| for wx_off in (BODY_W // 4, 3 * BODY_W // 4): | |
| pygame.draw.circle(screen, C['body_ol'], (bx + wx_off, FLOOR_Y), 6, 2) | |
| sp = w2s(*shoulder, cam_x) | |
| ep = w2s(*elbow, cam_x) | |
| fp = w2s(*foot, cam_x) | |
| pygame.draw.line(screen, C['seg1'], sp, ep, 6) | |
| pygame.draw.line(screen, C['seg2'], ep, fp, 5) | |
| pygame.draw.circle(screen, C['joint'], sp, 7) | |
| pygame.draw.circle(screen, C['joint'], ep, 6) | |
| fc = C['planted'] if env.planted else C['foot'] | |
| pygame.draw.circle(screen, fc, fp, 7) | |
| if env.planted: | |
| pygame.draw.line(screen, C['planted'], fp, (fp[0], FLOOR_Y), 1) | |
| _ACTION_LABELS = {0: "0 theta1+ (W)", 1: "1 theta1- (S)", | |
| 2: "2 theta2+ (D)", 3: "3 theta2- (A)"} | |
| action_str = _ACTION_LABELS.get(env.last_action, "—") | |
| hud = [ | |
| (f"last action : {action_str}", C['text']), | |
| (f"theta1 (horiz) : {env.theta1:+6.1f} deg [{T1_MIN:+.0f}, {T1_MAX:+.0f}]", C['text']), | |
| (f"theta2 (vert) : {env.theta2:+6.1f} deg [{T2_MIN:+.0f}, {T2_MAX:+.0f}]", C['text']), | |
| (f"position : {env.body_x:+.0f}", C['pos']), | |
| (f"total distance : {env.dist:+.1f}", C['pos']), | |
| (f"foot : {'PLANTED [green dot]' if env.planted else 'lifted [red dot]'}", | |
| C['planted'] if env.planted else C['foot']), | |
| ] | |
| if bottom_hints: | |
| hud.append(("", C['text'])) | |
| for h in bottom_hints: | |
| hud.append((h, C['dim'])) | |
| for i, (txt, col) in enumerate(hud): | |
| screen.blit(font.render(txt, True, col), (12, 10 + i * 23)) | |
| pygame.display.flip() | |
| # ── state-space trajectory plot ─────────────────────────────────────────────── | |
| def plot_trajectory(trajectory, title="State trajectory (θ1 × θ2)"): | |
| """ | |
| Plot the path of states visited in the θ1 × θ2 space. | |
| Parameters | |
| ---------- | |
| trajectory : list of (obs, action, reward, info) tuples | |
| As returned by get_rollout() in random_policy.py. | |
| title : str | |
| Window / figure title. | |
| The path is drawn as a colour-gradient line (blue → red over time) with | |
| small arrows showing direction of travel. Start is marked with a green | |
| circle, end with a red square. All valid grid points are shown as faint | |
| grey dots in the background. | |
| """ | |
| import matplotlib.pyplot as plt | |
| import matplotlib.collections as mc | |
| import numpy as np | |
| states = CrawlerEnv.all_states() | |
| grid_t1 = [s[0] for s in states] | |
| grid_t2 = [s[1] for s in states] | |
| t1s = [int(obs[0]) for obs, *_ in trajectory] | |
| t2s = [int(obs[1]) for obs, *_ in trajectory] | |
| n = len(t1s) | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| fig.suptitle(title) | |
| # faint grid dots for all valid states | |
| ax.scatter(grid_t1, grid_t2, s=12, color="lightgrey", zorder=1, label="valid states") | |
| # colour-gradient path: segments coloured blue→red by time | |
| cmap = plt.get_cmap("coolwarm") | |
| colors = [cmap(i / max(n - 1, 1)) for i in range(n - 1)] | |
| segments = [[(t1s[i], t2s[i]), (t1s[i+1], t2s[i+1])] for i in range(n - 1)] | |
| lc = mc.LineCollection(segments, colors=colors, linewidths=1.5, zorder=2) | |
| ax.add_collection(lc) | |
| # arrows every ~20 steps to show direction | |
| step = max(1, n // 20) | |
| for i in range(0, n - 1, step): | |
| dt1 = t1s[i+1] - t1s[i] | |
| dt2 = t2s[i+1] - t2s[i] | |
| if dt1 != 0 or dt2 != 0: | |
| ax.annotate("", xy=(t1s[i+1], t2s[i+1]), xytext=(t1s[i], t2s[i]), | |
| arrowprops=dict(arrowstyle="->", color=cmap(i / max(n-1, 1)), | |
| lw=1.2), zorder=3) | |
| # start / end markers | |
| ax.scatter([t1s[0]], [t2s[0]], s=80, color="green", marker="o", | |
| zorder=4, label="start") | |
| ax.scatter([t1s[-1]], [t2s[-1]], s=80, color="red", marker="s", | |
| zorder=4, label="end") | |
| ax.set_xlabel("θ1 (horizontal arm, deg)") | |
| ax.set_ylabel("θ2 (vertical arm, deg)") | |
| ax.set_xlim(T1_MIN - 5, T1_MAX + 5) | |
| ax.set_ylim(T2_MIN - 5, T2_MAX + 5) | |
| ax.set_xticks(range(int(T1_MIN), int(T1_MAX) + 1, int(STEP) * 3)) | |
| ax.set_yticks(range(int(T2_MIN), int(T2_MAX) + 1, int(STEP) * 3)) | |
| ax.grid(True, linestyle=":", linewidth=0.5, alpha=0.4) | |
| ax.legend(loc="upper right", fontsize=8) | |
| # colour bar showing time progression | |
| sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(0, n)) | |
| sm.set_array([]) | |
| fig.colorbar(sm, ax=ax, label="step") | |
| plt.tight_layout() | |
| plt.show() | |
| # ── human-interactive main ───────────────────────────────────────────────────── | |
| def main(): | |
| """Run the crawler in human-interactive mode (WASD / arrow keys).""" | |
| import pygame | |
| pygame.init() | |
| screen = pygame.display.set_mode((SW, SH)) | |
| pygame.display.set_caption("Crawler RL Environment – Human Play") | |
| clock = pygame.time.Clock() | |
| font = pygame.font.SysFont("monospace", 16) | |
| sfont = pygame.font.SysFont("monospace", 11) | |
| env = CrawlerEnv(render=False) # headless – this loop owns the rendering | |
| obs, _ = env.reset() | |
| cam_x = float(env.body_x + BODY_W / 2) | |
| KEY_MAP = { | |
| pygame.K_w: 0, pygame.K_UP: 0, | |
| pygame.K_s: 1, pygame.K_DOWN: 1, | |
| pygame.K_a: 3, pygame.K_LEFT: 3, | |
| pygame.K_d: 2, pygame.K_RIGHT: 2, | |
| } | |
| controls = [ | |
| "W/↑ theta1+ S/↓ theta1-", | |
| "A/← theta2- D/→ theta2+", | |
| "R reset Esc quit", | |
| ] | |
| print(__doc__) | |
| print("─" * 56) | |
| while True: | |
| for event in pygame.event.get(): | |
| if event.type == pygame.QUIT: | |
| pygame.quit(); sys.exit() | |
| elif event.type == pygame.KEYDOWN: | |
| if event.key == pygame.K_ESCAPE: | |
| pygame.quit(); sys.exit() | |
| elif event.key == pygame.K_r: | |
| obs, _ = env.reset() | |
| cam_x = float(env.body_x + BODY_W / 2) | |
| print(" [reset]") | |
| elif event.key in KEY_MAP: | |
| a = KEY_MAP[event.key] | |
| obs, rew, _, _, info = env.step(a) | |
| arrow = "→" if rew > 0.05 else ("←" if rew < -0.05 else "·") | |
| print(f" step {env.n_steps:4d} " | |
| f"θ1={obs[0]:+6.1f}° θ2={obs[1]:+7.1f}° " | |
| f"reward={rew:+5.1f} {arrow} " | |
| f"planted={'Y' if info['planted'] else 'n'} " | |
| f"total={info['dist']:+.1f}") | |
| cam_x += (env.body_x + BODY_W / 2 - cam_x) * 0.12 | |
| _draw_frame(screen, env, font, sfont, cam_x, bottom_hints=controls) | |
| clock.tick(HUMAN_FPS) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment