Skip to content

Instantly share code, notes, and snippets.

@ArztSamuel
Last active November 13, 2024 06:14
Show Gist options
  • Select an option

  • Save ArztSamuel/499e617844ca4ce6e222183bd23752f0 to your computer and use it in GitHub Desktop.

Select an option

Save ArztSamuel/499e617844ca4ce6e222183bd23752f0 to your computer and use it in GitHub Desktop.

Revisions

  1. ArztSamuel revised this gist Jan 5, 2020. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions ParkingCarAgent.cs
    Original file line number Diff line number Diff line change
    @@ -58,9 +58,9 @@ public override void AgentAction(float[] vectorAction, string textAction)
    return;

    // Action Inputs, length 3:
    // [0]: Throttle, remapped to range [0, 1]
    // [1]: Turning
    // [2]: Braking, remapped to range [0, 1]
    // [0]: Throttle, positive remapped to range [0, 1]
    // [0]: Braking, negative remapped to range [0, 1]
    // [1]: Turning, directly used as input

    carPhysics.CurrentThrottle = Mathf.Max(0, vectorAction[0]);
    carPhysics.CurrentBraking = Mathf.Max(0, -vectorAction[0]);
  2. ArztSamuel revised this gist Sep 26, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion ParkingCarAgent.cs
    Original file line number Diff line number Diff line change
    @@ -57,7 +57,7 @@ public override void AgentAction(float[] vectorAction, string textAction)
    if (IsDone())
    return;

    // Action Inputs, length 4:
    // Action Inputs, length 3:
    // [0]: Throttle, remapped to range [0, 1]
    // [1]: Turning
    // [2]: Braking, remapped to range [0, 1]
  3. ArztSamuel revised this gist Sep 6, 2019. 1 changed file with 118 additions and 118 deletions.
    236 changes: 118 additions & 118 deletions ParkingCarAgent.cs
    Original file line number Diff line number Diff line change
    @@ -4,151 +4,151 @@ public class ParkingCarAgent : Agent
    private Transform TargetParkingSpot;

    [SerializeField]
    // = Reward every 'interval' units getting closer
    private float DistanceRewardInterval = 3f;
    // = Reward every 'interval' units getting closer
    private float DistanceRewardInterval = 3f;

    // Thresholds defining when the task is complete
    [SerializeField]
    private float DistanceThreshold = 2;
    [SerializeField]
    private float RotationThreshold = 20;
    [SerializeField]
    private float SpeedTheshold = 5f;
    private float DistanceThreshold = 2;
    [SerializeField]
    private float RotationThreshold = 20;
    [SerializeField]
    private float SpeedTheshold = 5f;

    // Bounds the agent may not leave
    [SerializeField]
    private Bounds AllowedBounds;
    private Bounds AllowedBounds;

    private DistanceSensor[] distanceSensors;

    ...

    public override void CollectObservations()
    {
    base.CollectObservations();

    // Agent position, y rotation and velocity
    Vector3 normalizedAgentPosition = GetNormalizedPosition(this.transform.position);
    AddVectorObs(carPhysics.CurrentSpeed);
    AddVectorObs(normalizedAgentPosition.x);
    AddVectorObs(normalizedAgentPosition.z);
    Vector3 normalizedAgentRotation = GetNormalizedRotation(this.transform.rotation);
    AddVectorObs(normalizedAgentRotation.y);

    // Target position / y rotation
    Vector3 normalizedTargetPosition = GetNormalizedPosition(TargetParkingSpot.position);
    AddVectorObs(normalizedTargetPosition.x - normalizedAgentPosition.x);
    AddVectorObs(normalizedTargetPosition.z - normalizedAgentPosition.z);
    Vector3 normalizedTargetRotation = GetNormalizedRotation(TargetParkingSpot.rotation);
    AddVectorObs(normalizedTargetRotation.y - normalizedAgentRotation.y);

    // Add all sensor readings
    foreach (DistanceSensor sensor in distanceSensors)
    {
    sensor.UpdateSensorReadings();
    AddVectorObs(sensor.NormalizedDistance);
    }
    }
    {
    base.CollectObservations();

    // Agent position, y rotation and velocity
    Vector3 normalizedAgentPosition = GetNormalizedPosition(this.transform.position);
    AddVectorObs(carPhysics.CurrentSpeed);
    AddVectorObs(normalizedAgentPosition.x);
    AddVectorObs(normalizedAgentPosition.z);
    Vector3 normalizedAgentRotation = GetNormalizedRotation(this.transform.rotation);
    AddVectorObs(normalizedAgentRotation.y);

    // Target position / y rotation
    Vector3 normalizedTargetPosition = GetNormalizedPosition(TargetParkingSpot.position);
    AddVectorObs(normalizedTargetPosition.x - normalizedAgentPosition.x);
    AddVectorObs(normalizedTargetPosition.z - normalizedAgentPosition.z);
    Vector3 normalizedTargetRotation = GetNormalizedRotation(TargetParkingSpot.rotation);
    AddVectorObs(normalizedTargetRotation.y - normalizedAgentRotation.y);

    // Add all sensor readings
    foreach (DistanceSensor sensor in distanceSensors)
    {
    sensor.UpdateSensorReadings();
    AddVectorObs(sensor.NormalizedDistance);
    }
    }

    public override void AgentAction(float[] vectorAction, string textAction)
    {
    base.AgentAction(vectorAction, textAction);

    if (IsDone())
    return;

    // Action Inputs, length 4:
    // [0]: Throttle, remapped to range [0, 1]
    // [1]: Turning
    // [2]: Braking, remapped to range [0, 1]

    carPhysics.CurrentThrottle = Mathf.Max(0, vectorAction[0]);
    carPhysics.CurrentBraking = Mathf.Max(0, -vectorAction[0]);
    carPhysics.CurrentTurning = vectorAction[1];

    // Reward for getting closer; Note: could use sqrDistance here for performance
    float distanceToTarget = Vector3.Distance(this.transform.position, TargetParkingSpot.transform.position);
    if (distanceToTarget < previousDistance)
    {
    if ((int)(distanceToTarget / DistanceRewardInterval) < (int)(previousDistance / DistanceRewardInterval))
    AddReward(0.02f);

    previousDistance = distanceToTarget;
    }
    else
    {
    {
    base.AgentAction(vectorAction, textAction);

    if (IsDone())
    return;

    // Action Inputs, length 4:
    // [0]: Throttle, remapped to range [0, 1]
    // [1]: Turning
    // [2]: Braking, remapped to range [0, 1]

    carPhysics.CurrentThrottle = Mathf.Max(0, vectorAction[0]);
    carPhysics.CurrentBraking = Mathf.Max(0, -vectorAction[0]);
    carPhysics.CurrentTurning = vectorAction[1];

    // Reward for getting closer; Note: could use sqrDistance here for performance
    float distanceToTarget = Vector3.Distance(this.transform.position, TargetParkingSpot.transform.position);
    if (distanceToTarget < previousDistance)
    {
    if ((int)(distanceToTarget / DistanceRewardInterval) < (int)(previousDistance / DistanceRewardInterval))
    AddReward(0.02f);

    previousDistance = distanceToTarget;
    }
    else
    {
    // Note: '* 2' is a hard coded value here, which I introduced after tuning the penalty to occur less frequently than
    // the reward, in order to not 'scare' the AI of performing corrective maneuvers where it has to first increase the
    // distance to the target parking spot.
    if ((int)(distanceToTarget / (DistanceRewardInterval * 2)) > (int)(previousDistance / (DistanceRewardInterval * 2)))
    {
    if (Verbose)
    Debug.Log("Distance based penalty");
    AddReward(-0.04f);
    if ((int)(distanceToTarget / (DistanceRewardInterval * 2)) > (int)(previousDistance / (DistanceRewardInterval * 2)))
    {
    if (Verbose)
    Debug.Log("Distance based penalty");
    AddReward(-0.04f);

    previousDistance = distanceToTarget;
    }
    }
    previousDistance = distanceToTarget;
    }
    }

    // Check task completion (= position and rotation lower than threshold)
    float rotationDiff = Quaternion.Angle(this.transform.rotation, TargetParkingSpot.rotation);
    // Check task completion (= position and rotation lower than threshold)
    float rotationDiff = Quaternion.Angle(this.transform.rotation, TargetParkingSpot.rotation);

    if (distanceToTarget <= DistanceThreshold)
    {
    if (distanceToTarget <= DistanceThreshold)
    {
    // Angle wrap-around
    if (rotationDiff > 90)
    rotationDiff = 180 - rotationDiff;
    if (rotationDiff > 90)
    rotationDiff = 180 - rotationDiff;

    if (Mathf.Abs(carPhysics.CurrentSpeed) <= SpeedTheshold)
    {
    if (Mathf.Abs(carPhysics.CurrentSpeed) <= SpeedTheshold)
    {
    // Determine how well (= how parallel) the AI parked
    float reward = 1;
    if (rotationDiff > RotationThreshold)
    reward = 1 - GetNormalizedValue(rotationDiff, RotationThreshold, 90);
    float reward = 1;
    if (rotationDiff > RotationThreshold)
    reward = 1 - GetNormalizedValue(rotationDiff, RotationThreshold, 90);

    AddReward(reward);
    Done();
    AddReward(reward);
    Done();

    return;
    }
    }

    if (!AllowedBounds.Contains(new Vector3Int((int)transform.position.x, (int)transform.position.y, (int)transform.position.z)))
    {
    AddReward(-1.0f);
    Done();
    return;
    }
    }
    return;
    }
    }

    if (!AllowedBounds.Contains(new Vector3Int((int)transform.position.x, (int)transform.position.y, (int)transform.position.z)))
    {
    AddReward(-1.0f);
    Done();
    return;
    }
    }

    private Vector3 GetNormalizedPosition(in Vector3 position)
    {
    float normalizedX = GetNormalizedValue(position.x, AllowedBounds.min.x, AllowedBounds.max.x);
    float normalizedY = GetNormalizedValue(position.y, AllowedBounds.min.y, AllowedBounds.max.y);
    float normalizedZ = GetNormalizedValue(position.z, AllowedBounds.min.z, AllowedBounds.max.z);

    return new Vector3(normalizedX, normalizedY, normalizedZ);
    }

    private Vector3 GetNormalizedRotation(in Quaternion rotation)
    {
    float normalizedX = GetNormalizedValue(rotation.eulerAngles.x, 0, 360);
    float normalizedY = GetNormalizedValue(rotation.eulerAngles.y, 0, 360);
    float normalizedZ = GetNormalizedValue(rotation.eulerAngles.z, 0, 360);

    return new Vector3(normalizedX, normalizedY, normalizedZ);
    }

    private float GetNormalizedValue(float currentValue, float minValue, float maxValue)
    {
    return (currentValue - minValue) / (maxValue - minValue);
    }

    void OnCollisionEnter(Collision collision)
    {
    if (collision.collider.gameObject.GetComponent<Knockable>() || collision.collider.gameObject.GetComponentInParent<ParkingCar>())
    AddReward(-0.12f);
    }
    {
    float normalizedX = GetNormalizedValue(position.x, AllowedBounds.min.x, AllowedBounds.max.x);
    float normalizedY = GetNormalizedValue(position.y, AllowedBounds.min.y, AllowedBounds.max.y);
    float normalizedZ = GetNormalizedValue(position.z, AllowedBounds.min.z, AllowedBounds.max.z);

    return new Vector3(normalizedX, normalizedY, normalizedZ);
    }

    private Vector3 GetNormalizedRotation(in Quaternion rotation)
    {
    float normalizedX = GetNormalizedValue(rotation.eulerAngles.x, 0, 360);
    float normalizedY = GetNormalizedValue(rotation.eulerAngles.y, 0, 360);
    float normalizedZ = GetNormalizedValue(rotation.eulerAngles.z, 0, 360);

    return new Vector3(normalizedX, normalizedY, normalizedZ);
    }

    private float GetNormalizedValue(float currentValue, float minValue, float maxValue)
    {
    return (currentValue - minValue) / (maxValue - minValue);
    }

    void OnCollisionEnter(Collision collision)
    {
    if (collision.collider.gameObject.GetComponent<Knockable>() || collision.collider.gameObject.GetComponentInParent<ParkingCar>())
    AddReward(-0.12f);
    }

    ...
    }
  4. ArztSamuel revised this gist Sep 6, 2019. No changes.
  5. ArztSamuel revised this gist Sep 6, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion ParkingCarAgent.cs
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,7 @@
    public class ParkingCarAgent : Agent
    {
    [SerializeField]
    private Transform TargetParkingSpot;
    private Transform TargetParkingSpot;

    [SerializeField]
    // = Reward every 'interval' units getting closer
  6. ArztSamuel created this gist Sep 5, 2019.
    154 changes: 154 additions & 0 deletions ParkingCarAgent.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,154 @@
    public class ParkingCarAgent : Agent
    {
    [SerializeField]
    private Transform TargetParkingSpot;

    [SerializeField]
    // = Reward every 'interval' units getting closer
    private float DistanceRewardInterval = 3f;

    // Thresholds defining when the task is complete
    [SerializeField]
    private float DistanceThreshold = 2;
    [SerializeField]
    private float RotationThreshold = 20;
    [SerializeField]
    private float SpeedTheshold = 5f;

    // Bounds the agent may not leave
    [SerializeField]
    private Bounds AllowedBounds;

    private DistanceSensor[] distanceSensors;

    ...

    public override void CollectObservations()
    {
    base.CollectObservations();

    // Agent position, y rotation and velocity
    Vector3 normalizedAgentPosition = GetNormalizedPosition(this.transform.position);
    AddVectorObs(carPhysics.CurrentSpeed);
    AddVectorObs(normalizedAgentPosition.x);
    AddVectorObs(normalizedAgentPosition.z);
    Vector3 normalizedAgentRotation = GetNormalizedRotation(this.transform.rotation);
    AddVectorObs(normalizedAgentRotation.y);

    // Target position / y rotation
    Vector3 normalizedTargetPosition = GetNormalizedPosition(TargetParkingSpot.position);
    AddVectorObs(normalizedTargetPosition.x - normalizedAgentPosition.x);
    AddVectorObs(normalizedTargetPosition.z - normalizedAgentPosition.z);
    Vector3 normalizedTargetRotation = GetNormalizedRotation(TargetParkingSpot.rotation);
    AddVectorObs(normalizedTargetRotation.y - normalizedAgentRotation.y);

    // Add all sensor readings
    foreach (DistanceSensor sensor in distanceSensors)
    {
    sensor.UpdateSensorReadings();
    AddVectorObs(sensor.NormalizedDistance);
    }
    }

    public override void AgentAction(float[] vectorAction, string textAction)
    {
    base.AgentAction(vectorAction, textAction);

    if (IsDone())
    return;

    // Action Inputs, length 4:
    // [0]: Throttle, remapped to range [0, 1]
    // [1]: Turning
    // [2]: Braking, remapped to range [0, 1]

    carPhysics.CurrentThrottle = Mathf.Max(0, vectorAction[0]);
    carPhysics.CurrentBraking = Mathf.Max(0, -vectorAction[0]);
    carPhysics.CurrentTurning = vectorAction[1];

    // Reward for getting closer; Note: could use sqrDistance here for performance
    float distanceToTarget = Vector3.Distance(this.transform.position, TargetParkingSpot.transform.position);
    if (distanceToTarget < previousDistance)
    {
    if ((int)(distanceToTarget / DistanceRewardInterval) < (int)(previousDistance / DistanceRewardInterval))
    AddReward(0.02f);

    previousDistance = distanceToTarget;
    }
    else
    {
    // Note: '* 2' is a hard coded value here, which I introduced after tuning the penalty to occur less frequently than
    // the reward, in order to not 'scare' the AI of performing corrective maneuvers where it has to first increase the
    // distance to the target parking spot.
    if ((int)(distanceToTarget / (DistanceRewardInterval * 2)) > (int)(previousDistance / (DistanceRewardInterval * 2)))
    {
    if (Verbose)
    Debug.Log("Distance based penalty");
    AddReward(-0.04f);

    previousDistance = distanceToTarget;
    }
    }

    // Check task completion (= position and rotation lower than threshold)
    float rotationDiff = Quaternion.Angle(this.transform.rotation, TargetParkingSpot.rotation);

    if (distanceToTarget <= DistanceThreshold)
    {
    // Angle wrap-around
    if (rotationDiff > 90)
    rotationDiff = 180 - rotationDiff;

    if (Mathf.Abs(carPhysics.CurrentSpeed) <= SpeedTheshold)
    {
    // Determine how well (= how parallel) the AI parked
    float reward = 1;
    if (rotationDiff > RotationThreshold)
    reward = 1 - GetNormalizedValue(rotationDiff, RotationThreshold, 90);

    AddReward(reward);
    Done();

    return;
    }
    }

    if (!AllowedBounds.Contains(new Vector3Int((int)transform.position.x, (int)transform.position.y, (int)transform.position.z)))
    {
    AddReward(-1.0f);
    Done();
    return;
    }
    }

    private Vector3 GetNormalizedPosition(in Vector3 position)
    {
    float normalizedX = GetNormalizedValue(position.x, AllowedBounds.min.x, AllowedBounds.max.x);
    float normalizedY = GetNormalizedValue(position.y, AllowedBounds.min.y, AllowedBounds.max.y);
    float normalizedZ = GetNormalizedValue(position.z, AllowedBounds.min.z, AllowedBounds.max.z);

    return new Vector3(normalizedX, normalizedY, normalizedZ);
    }

    private Vector3 GetNormalizedRotation(in Quaternion rotation)
    {
    float normalizedX = GetNormalizedValue(rotation.eulerAngles.x, 0, 360);
    float normalizedY = GetNormalizedValue(rotation.eulerAngles.y, 0, 360);
    float normalizedZ = GetNormalizedValue(rotation.eulerAngles.z, 0, 360);

    return new Vector3(normalizedX, normalizedY, normalizedZ);
    }

    private float GetNormalizedValue(float currentValue, float minValue, float maxValue)
    {
    return (currentValue - minValue) / (maxValue - minValue);
    }

    void OnCollisionEnter(Collision collision)
    {
    if (collision.collider.gameObject.GetComponent<Knockable>() || collision.collider.gameObject.GetComponentInParent<ParkingCar>())
    AddReward(-0.12f);
    }

    ...
    }