import tensorflow as tf # FIXME: audio_ops.decode_wav is deprecated, use tensorflow_io.IOTensor.from_audio from tensorflow.contrib.framework.python.ops import audio_ops # Enable eager execution for a more interactive frontend. # If using the default graph mode, you'll probably need to run in a session. tf.enable_eager_execution() @tf.function def audio_to_mfccs( audio_contents, channels=1, sample_rate=8000 ): waveform = audio_ops.decode_wav( audio_contents, desired_channels=channels) # FIXME: Maybe tf.transpose is not needed in tf 2.x. stfts = tf.contrib.signal.stft(tf.transpose(waveform.audio), frame_length=1024, frame_step=256, fft_length=1024) spectrograms = tf.abs(stfts) # Warp the linear scale spectrograms into the mel-scale. num_spectrogram_bins = stfts.shape[-1].value lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80, sample_rate / 2, 128 linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix( num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, upper_edge_hertz) mel_spectrograms = tf.tensordot( spectrograms, linear_to_mel_weight_matrix, 1) mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate( linear_to_mel_weight_matrix.shape[-1:])) # Compute a stabilized log to get log-magnitude mel-scale spectrograms. log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6) # Compute MFCCs from log_mel_spectrograms mfccs = tf.signal.mfccs_from_log_mel_spectrograms( log_mel_spectrograms) return mfccs if __name__ == '__main__': input_file = tf.constant('test.wav') # Compute the mfccs audio = tf.io.read_file(input_file) mfccs = audio_to_mfccs(audio) # Get only the first 20 print(mfccs[..., :20])