|
|
@@ -0,0 +1,142 @@ |
|
|
#!/usr/bin/env python |
|
|
|
|
|
import math |
|
|
import sys |
|
|
from moviepy.editor import AudioClip, VideoFileClip, concatenate_videoclips |
|
|
|
|
|
|
|
|
|
|
|
# Get average RGB of part of a frame. Frame is H * W * 3 (rgb) |
|
|
# Assumes x1 < x2, y1 < y2 |
|
|
def avg_rgb(frame, x1, y1, x2, y2): |
|
|
r, g, b = 0, 0, 0 |
|
|
for x in range(x1, x2): |
|
|
for y in range(y1, y2): |
|
|
r += frame[x, y, 0] |
|
|
g += frame[x, y, 1] |
|
|
b += frame[x, y, 2] |
|
|
total_pixels = (x2 - x1) * (y2 - y1) |
|
|
avg_r = r / total_pixels |
|
|
avg_g = g / total_pixels |
|
|
avg_b = b / total_pixels |
|
|
#print(avg_r, avg_g, avg_b) |
|
|
return avg_r, avg_g, avg_b |
|
|
|
|
|
|
|
|
# Look for colors in frame, edit based on that. |
|
|
# Returns list of (start, end) tuples of time intervals we want to keep. |
|
|
def color_edit(video): |
|
|
intervals_to_keep = [] |
|
|
frame_marker = [] # 'c': content; 'y': keep prior interval; 'n' |
|
|
# Iterate over every frame. |
|
|
for frame in video.iter_frames(): |
|
|
avg_r, avg_g, avg_b = avg_rgb(frame, 100, 100, 110, 110) |
|
|
is_red = (avg_r > 120) and (avg_g < 50) and (avg_b < 50) |
|
|
is_green = (avg_r < 50) and (avg_g > 120) and (avg_b < 50) |
|
|
marker = 'c' |
|
|
if is_red: |
|
|
marker = 'n' |
|
|
elif is_green: |
|
|
marker = 'y' |
|
|
frame_marker.append(marker) |
|
|
#print(frame_marker) |
|
|
|
|
|
keep_start, keep_end = 0, 0 |
|
|
keep_intervals = [] |
|
|
for i in range(1, len(frame_marker)): |
|
|
m1 = frame_marker[i - 1] |
|
|
m2 = frame_marker[i] |
|
|
# Green followed by content. Keep previous interval. Start a (possible) new interval. |
|
|
if m1 == 'y' and m2 == 'c': |
|
|
keep_end = i / video.fps |
|
|
keep_intervals.append([keep_start, keep_end]) |
|
|
keep_start = (i + 1) / video.fps |
|
|
# Red followed by content. Drop the previous interval. Start a (possible) new interval. |
|
|
if m1 == 'n' and m2 == 'c': |
|
|
keep_start = i / video.fps |
|
|
|
|
|
# Ending on green with no following content. |
|
|
last_index = len(frame_marker) - 1 |
|
|
if frame_marker[last_index] == 'c' or frame_marker[last_index] == 'y': |
|
|
keep_end = i / video.fps |
|
|
keep_intervals.append([keep_start, keep_end]) |
|
|
|
|
|
|
|
|
return keep_intervals |
|
|
|
|
|
|
|
|
# Iterate over audio to find the non-silent parts. Outputs a list of |
|
|
# (speaking_start, speaking_end) intervals. |
|
|
# Args: |
|
|
# window_size: (in seconds) hunt for silence in windows of this size |
|
|
# volume_threshold: volume below this threshold is considered to be silence |
|
|
# ease_in: (in seconds) add this much silence around speaking intervals |
|
|
def find_speaking(audio_clip, window_size=0.1, volume_threshold=0.01, ease_in=0.1, audio_fps=44100): |
|
|
# First, iterate over audio to find all silent windows. |
|
|
num_windows = math.floor(audio_clip.end/window_size) |
|
|
window_is_silent = [] |
|
|
for i in range(num_windows): |
|
|
s = audio_clip.subclip(i * window_size, (i + 1) * window_size).set_fps(audio_fps) |
|
|
v = s.max_volume() |
|
|
window_is_silent.append(v < volume_threshold) |
|
|
|
|
|
# Find speaking intervals. |
|
|
speaking_start = 0 |
|
|
speaking_end = 0 |
|
|
speaking_intervals = [] |
|
|
for i in range(1, len(window_is_silent)): |
|
|
e1 = window_is_silent[i - 1] |
|
|
e2 = window_is_silent[i] |
|
|
# silence -> speaking |
|
|
if e1 and not e2: |
|
|
speaking_start = i * window_size |
|
|
# speaking -> silence, now have a speaking interval |
|
|
if not e1 and e2: |
|
|
speaking_end = i * window_size |
|
|
new_speaking_interval = [max(0, speaking_start - ease_in), speaking_end + ease_in] |
|
|
# With tiny windows, this can sometimes overlap the previous window, so merge. |
|
|
need_to_merge = len(speaking_intervals) > 0 and speaking_intervals[-1][1] > new_speaking_interval[0] |
|
|
if need_to_merge: |
|
|
merged_interval = [speaking_intervals[-1][0], new_speaking_interval[1]] |
|
|
speaking_intervals[-1] = merged_interval |
|
|
else: |
|
|
speaking_intervals.append(new_speaking_interval) |
|
|
|
|
|
return speaking_intervals |
|
|
|
|
|
|
|
|
def main(): |
|
|
# Parse args |
|
|
# Input file path |
|
|
file_in = sys.argv[1] |
|
|
# Output file path |
|
|
file_out = sys.argv[2] |
|
|
|
|
|
vid = VideoFileClip(file_in) |
|
|
|
|
|
# Color edit. |
|
|
intervals_to_keep = color_edit(vid) |
|
|
print("Keeping color edit intervals: " + str(intervals_to_keep)) |
|
|
keep_clips = [vid.subclip(start, end) for [start, end] in intervals_to_keep] |
|
|
color_edited_video = concatenate_videoclips(keep_clips) |
|
|
|
|
|
# Cut out dead air. |
|
|
speaking_intervals = find_speaking(color_edited_video.audio, audio_fps=vid.audio.fps) |
|
|
print("Keeping speaking intervals: " + str(speaking_intervals)) |
|
|
speaking_clips = [color_edited_video.subclip(start, end) for [start, end] in speaking_intervals] |
|
|
final_video = concatenate_videoclips(speaking_clips) |
|
|
|
|
|
final_video.write_videofile(file_out, |
|
|
#fps=60, |
|
|
preset='ultrafast', |
|
|
codec='libx264', |
|
|
temp_audiofile='temp-audio.m4a', |
|
|
remove_temp=True, |
|
|
audio_codec="aac", |
|
|
threads=6 |
|
|
) |
|
|
|
|
|
vid.close() |
|
|
|
|
|
if __name__ == '__main__': |
|
|
main() |