Skip to content

Instantly share code, notes, and snippets.

@araffin
Last active September 2, 2025 13:09
Show Gist options
  • Select an option

  • Save araffin/1fb77a8f290ac248b2e76e01164f21e0 to your computer and use it in GitHub Desktop.

Select an option

Save araffin/1fb77a8f290ac248b2e76e01164f21e0 to your computer and use it in GitHub Desktop.

Revisions

  1. araffin revised this gist Jun 10, 2025. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions halfcheetah_minimal.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@
    # MIT License Copyright (c) 2024 Antonin Raffin
    import gymnasium as gym
    import numpy as np
    from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
  2. araffin revised this gist Feb 29, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion halfcheetah_minimal.py
    Original file line number Diff line number Diff line change
    @@ -45,7 +45,7 @@
    t += mujoco_env.dt

    if terminated or truncated:
    print(f"Episode return: {float(info['episode']['r']):.2f}")
    print(f"Episode return: {float(info['episode']['r'].item()):.2f}")
    t, _ = 0.0, env.reset()
    # Reinitialize
    theta = phase_shifts.copy()
  3. araffin revised this gist Feb 20, 2024. 1 changed file with 5 additions and 5 deletions.
    10 changes: 5 additions & 5 deletions halfcheetah_minimal.py
    Original file line number Diff line number Diff line change
    @@ -14,11 +14,11 @@
    # Reset the environment
    t, _ = 0.0, env.reset(seed=0)
    # Oscillators parameters
    omega_stance = 2 * np.pi * 4.62 * np.ones(n_joints)
    omega_swing = 2 * np.pi * 3.86 * np.ones(n_joints)
    phase_shifts = 2 * np.pi * np.array([0.00, 0.79, 0.32, 0.29, 0.62, 0.92])
    amplitudes = np.array([1.12, -1.91, -1.2, 1.17, 1.20, -0.08])
    offsets = np.array([-0.11, 0.08, 0.0, -0.49, -0.5, -0.23])
    omega_stance = 2 * np.pi * 4.622 * np.ones(n_joints)
    omega_swing = 2 * np.pi * 3.865 * np.ones(n_joints)
    phase_shifts = 2 * np.pi * np.array([0.00, 0.789, 0.316, 0.294, 0.629, 0.921])
    amplitudes = np.array([1.123, -1.91, -1.204, 1.173, 1.196, -0.085])
    offsets = np.array([-0.114, 0.075, 0.002, -0.493, -0.501, -0.227])
    oscillator_dt = 0.001 # 1kHz, integration step
    # Initial joint positions
    theta = phase_shifts.copy()
  4. araffin revised this gist Feb 20, 2024. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion halfcheetah_minimal.py
    Original file line number Diff line number Diff line change
    @@ -17,10 +17,11 @@
    omega_stance = 2 * np.pi * 4.62 * np.ones(n_joints)
    omega_swing = 2 * np.pi * 3.86 * np.ones(n_joints)
    phase_shifts = 2 * np.pi * np.array([0.00, 0.79, 0.32, 0.29, 0.62, 0.92])
    theta = phase_shifts.copy()
    amplitudes = np.array([1.12, -1.91, -1.2, 1.17, 1.20, -0.08])
    offsets = np.array([-0.11, 0.08, 0.0, -0.49, -0.5, -0.23])
    oscillator_dt = 0.001 # 1kHz, integration step
    # Initial joint positions
    theta = phase_shifts.copy()

    while True:
    env.render()
  5. araffin created this gist Feb 20, 2024.
    50 changes: 50 additions & 0 deletions halfcheetah_minimal.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,50 @@
    import gymnasium as gym
    import numpy as np
    from gymnasium.envs.mujoco.mujoco_env import MujocoEnv

    # Env initialization
    env = gym.make("HalfCheetah-v4", render_mode="human")
    # Wrap to have reward statistics
    env = gym.wrappers.RecordEpisodeStatistics(env)
    mujoco_env = env.unwrapped
    n_joints = 6
    assert isinstance(mujoco_env, MujocoEnv)
    # PD Controller gains
    kp, kd = 1.0, 0.05
    # Reset the environment
    t, _ = 0.0, env.reset(seed=0)
    # Oscillators parameters
    omega_stance = 2 * np.pi * 4.62 * np.ones(n_joints)
    omega_swing = 2 * np.pi * 3.86 * np.ones(n_joints)
    phase_shifts = 2 * np.pi * np.array([0.00, 0.79, 0.32, 0.29, 0.62, 0.92])
    theta = phase_shifts.copy()
    amplitudes = np.array([1.12, -1.91, -1.2, 1.17, 1.20, -0.08])
    offsets = np.array([-0.11, 0.08, 0.0, -0.49, -0.5, -0.23])
    oscillator_dt = 0.001 # 1kHz, integration step

    while True:
    env.render()
    # Integrate oscillators equations
    for _ in range(int(mujoco_env.dt / oscillator_dt)):
    in_swing_phase = np.sin(theta) > 0
    theta_dot = in_swing_phase * omega_swing + (1 - in_swing_phase) * omega_stance
    # Integrate and keep theta in [0, 2 * pi]
    theta = (theta + oscillator_dt * theta_dot) % (2 * np.pi)

    # Open-Loop Control using oscillators
    desired_qpos = amplitudes * np.sin(theta) + offsets

    # PD Control: desired qvel is zero
    desired_torques = (
    kp * (desired_qpos - mujoco_env.data.qpos[-n_joints:])
    - kd * mujoco_env.data.qvel[-n_joints:]
    )
    desired_torques = np.clip(desired_torques, -1.0, 1.0) # clip to action bounds
    _, reward, terminated, truncated, info = env.step(desired_torques)
    t += mujoco_env.dt

    if terminated or truncated:
    print(f"Episode return: {float(info['episode']['r']):.2f}")
    t, _ = 0.0, env.reset()
    # Reinitialize
    theta = phase_shifts.copy()