Skip to content

Instantly share code, notes, and snippets.

@dennislwm
Forked from vivekhaldar/color_edit.py
Created June 13, 2021 03:51
Show Gist options
  • Select an option

  • Save dennislwm/230b05d0d40196a30975010e0ae08339 to your computer and use it in GitHub Desktop.

Select an option

Save dennislwm/230b05d0d40196a30975010e0ae08339 to your computer and use it in GitHub Desktop.

Revisions

  1. @vivekhaldar vivekhaldar revised this gist Sep 4, 2020. 1 changed file with 7 additions and 5 deletions.
    12 changes: 7 additions & 5 deletions color_edit.py
    Original file line number Diff line number Diff line change
    @@ -27,7 +27,7 @@ def avg_rgb(frame, x1, y1, x2, y2):
    # Returns list of (start, end) tuples of time intervals we want to keep.
    def color_edit(video):
    intervals_to_keep = []
    frame_marker = [] # 'c': content; 'y': keep prior interval; 'n'
    frame_marker = [] # 'c': content; 'y': keep prior interval; 'n': drop prior interval.
    # Iterate over every frame.
    for frame in video.iter_frames():
    avg_r, avg_g, avg_b = avg_rgb(frame, 100, 100, 110, 110)
    @@ -39,16 +39,19 @@ def color_edit(video):
    elif is_green:
    marker = 'y'
    frame_marker.append(marker)
    #print(frame_marker)

    keep_start, keep_end = 0, 0
    keep_intervals = []
    start_of_last_green = 0
    for i in range(1, len(frame_marker)):
    m1 = frame_marker[i - 1]
    m2 = frame_marker[i]
    # Content followed by green, take note.
    if m1 == 'c' and m2 == 'y':
    start_of_last_green = i
    # Green followed by content. Keep previous interval. Start a (possible) new interval.
    if m1 == 'y' and m2 == 'c':
    keep_end = i / video.fps
    keep_end = start_of_last_green / video.fps
    keep_intervals.append([keep_start, keep_end])
    keep_start = (i + 1) / video.fps
    # Red followed by content. Drop the previous interval. Start a (possible) new interval.
    @@ -61,7 +64,6 @@ def color_edit(video):
    keep_end = i / video.fps
    keep_intervals.append([keep_start, keep_end])


    return keep_intervals


    @@ -71,7 +73,7 @@ def color_edit(video):
    # window_size: (in seconds) hunt for silence in windows of this size
    # volume_threshold: volume below this threshold is considered to be silence
    # ease_in: (in seconds) add this much silence around speaking intervals
    def find_speaking(audio_clip, window_size=0.1, volume_threshold=0.01, ease_in=0.1, audio_fps=44100):
    def find_speaking(audio_clip, window_size=0.1, volume_threshold=0.02, ease_in=0.1, audio_fps=44100):
    # First, iterate over audio to find all silent windows.
    num_windows = math.floor(audio_clip.end/window_size)
    window_is_silent = []
  2. @vivekhaldar vivekhaldar created this gist Aug 30, 2020.
    142 changes: 142 additions & 0 deletions color_edit.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,142 @@
    #!/usr/bin/env python

    import math
    import sys
    from moviepy.editor import AudioClip, VideoFileClip, concatenate_videoclips



    # Get average RGB of part of a frame. Frame is H * W * 3 (rgb)
    # Assumes x1 < x2, y1 < y2
    def avg_rgb(frame, x1, y1, x2, y2):
    r, g, b = 0, 0, 0
    for x in range(x1, x2):
    for y in range(y1, y2):
    r += frame[x, y, 0]
    g += frame[x, y, 1]
    b += frame[x, y, 2]
    total_pixels = (x2 - x1) * (y2 - y1)
    avg_r = r / total_pixels
    avg_g = g / total_pixels
    avg_b = b / total_pixels
    #print(avg_r, avg_g, avg_b)
    return avg_r, avg_g, avg_b


    # Look for colors in frame, edit based on that.
    # Returns list of (start, end) tuples of time intervals we want to keep.
    def color_edit(video):
    intervals_to_keep = []
    frame_marker = [] # 'c': content; 'y': keep prior interval; 'n'
    # Iterate over every frame.
    for frame in video.iter_frames():
    avg_r, avg_g, avg_b = avg_rgb(frame, 100, 100, 110, 110)
    is_red = (avg_r > 120) and (avg_g < 50) and (avg_b < 50)
    is_green = (avg_r < 50) and (avg_g > 120) and (avg_b < 50)
    marker = 'c'
    if is_red:
    marker = 'n'
    elif is_green:
    marker = 'y'
    frame_marker.append(marker)
    #print(frame_marker)

    keep_start, keep_end = 0, 0
    keep_intervals = []
    for i in range(1, len(frame_marker)):
    m1 = frame_marker[i - 1]
    m2 = frame_marker[i]
    # Green followed by content. Keep previous interval. Start a (possible) new interval.
    if m1 == 'y' and m2 == 'c':
    keep_end = i / video.fps
    keep_intervals.append([keep_start, keep_end])
    keep_start = (i + 1) / video.fps
    # Red followed by content. Drop the previous interval. Start a (possible) new interval.
    if m1 == 'n' and m2 == 'c':
    keep_start = i / video.fps

    # Ending on green with no following content.
    last_index = len(frame_marker) - 1
    if frame_marker[last_index] == 'c' or frame_marker[last_index] == 'y':
    keep_end = i / video.fps
    keep_intervals.append([keep_start, keep_end])


    return keep_intervals


    # Iterate over audio to find the non-silent parts. Outputs a list of
    # (speaking_start, speaking_end) intervals.
    # Args:
    # window_size: (in seconds) hunt for silence in windows of this size
    # volume_threshold: volume below this threshold is considered to be silence
    # ease_in: (in seconds) add this much silence around speaking intervals
    def find_speaking(audio_clip, window_size=0.1, volume_threshold=0.01, ease_in=0.1, audio_fps=44100):
    # First, iterate over audio to find all silent windows.
    num_windows = math.floor(audio_clip.end/window_size)
    window_is_silent = []
    for i in range(num_windows):
    s = audio_clip.subclip(i * window_size, (i + 1) * window_size).set_fps(audio_fps)
    v = s.max_volume()
    window_is_silent.append(v < volume_threshold)

    # Find speaking intervals.
    speaking_start = 0
    speaking_end = 0
    speaking_intervals = []
    for i in range(1, len(window_is_silent)):
    e1 = window_is_silent[i - 1]
    e2 = window_is_silent[i]
    # silence -> speaking
    if e1 and not e2:
    speaking_start = i * window_size
    # speaking -> silence, now have a speaking interval
    if not e1 and e2:
    speaking_end = i * window_size
    new_speaking_interval = [max(0, speaking_start - ease_in), speaking_end + ease_in]
    # With tiny windows, this can sometimes overlap the previous window, so merge.
    need_to_merge = len(speaking_intervals) > 0 and speaking_intervals[-1][1] > new_speaking_interval[0]
    if need_to_merge:
    merged_interval = [speaking_intervals[-1][0], new_speaking_interval[1]]
    speaking_intervals[-1] = merged_interval
    else:
    speaking_intervals.append(new_speaking_interval)

    return speaking_intervals


    def main():
    # Parse args
    # Input file path
    file_in = sys.argv[1]
    # Output file path
    file_out = sys.argv[2]

    vid = VideoFileClip(file_in)

    # Color edit.
    intervals_to_keep = color_edit(vid)
    print("Keeping color edit intervals: " + str(intervals_to_keep))
    keep_clips = [vid.subclip(start, end) for [start, end] in intervals_to_keep]
    color_edited_video = concatenate_videoclips(keep_clips)

    # Cut out dead air.
    speaking_intervals = find_speaking(color_edited_video.audio, audio_fps=vid.audio.fps)
    print("Keeping speaking intervals: " + str(speaking_intervals))
    speaking_clips = [color_edited_video.subclip(start, end) for [start, end] in speaking_intervals]
    final_video = concatenate_videoclips(speaking_clips)

    final_video.write_videofile(file_out,
    #fps=60,
    preset='ultrafast',
    codec='libx264',
    temp_audiofile='temp-audio.m4a',
    remove_temp=True,
    audio_codec="aac",
    threads=6
    )

    vid.close()

    if __name__ == '__main__':
    main()