Source code for analysis.FFTAnalysis

"""
Module for creating an FFT analysis of audio.

Ref: Code adapted from:
http://www.frank-zalkow.de/en/code-snippets/create-audio-spectrograms-with-python.html?ckattempt=1
"""
from __future__ import print_function, division
import logging
from fileops import pathops
import numpy as np
from numpy.lib import stride_tricks
import os
from AnalysisTools import ButterFilter
from Analysis import Analysis
import pdb

logger = logging.getLogger(__name__)


[docs]class FFTAnalysis(Analysis): """ FFT analysis descriptor class for generation of FFT spectral analysis. This descriptor calculates the spectral content for overlapping grains of an AnalysedAudioFile object. A full definition of FFT analysis can be found in the documentation. Arguments: - analysis_group: the HDF5 file group to use for the storage of the analysis. - config: The configuration module used to configure the analysis """ def __init__(self, AnalysedAudioFile, frames, analysis_group, config=None): super(FFTAnalysis, self).__init__(AnalysedAudioFile, frames, analysis_group, 'FFT') self.logger = logging.getLogger(__name__+'.{0}Analysis'.format(self.name)) # Store reference to the file to be analysed self.AnalysedAudioFile = AnalysedAudioFile if config: window_size = config.fft["window_size"] else: window_size = 2048 self.analysis_group = analysis_group self.logger.info("Creating FFT analysis for {0}".format(self.AnalysedAudioFile.name)) self.create_analysis(frames, window_size=window_size) self.fft_window_count = None
[docs] def create_fft_analysis(self, frames, window_size=512, window_overlap=2, window_type='hanning'): """Create a spectral analysis for overlapping frames of audio.""" if hasattr(frames, '__call__'): frames = frames() # Calculate the period of the window in hz lowest_freq = 1.0 / window_size # Filter frequencies lower than the period of the window # filter = ButterFilter() # filter.design_butter(lowest_freq, self.AnalysedAudioFile.samplerate) # frames = filter.filter_butter(frames) stft = self.stft(frames, window_size, overlapFac=1/window_overlap) frame_times = self.calc_fft_frame_times( stft, frames, self.AnalysedAudioFile.samplerate ) return (stft, frame_times)
[docs] def get_analysis_grains(self, start, end): """ Retrieve analysis frames for period specified in start and end times. arrays of start and end time pairs will produce an array of equivelant size containing frames for these times. """ times = self.analysis_group["FFT"]["times"][:] start = start / 1000 end = end / 1000 vtimes = times.reshape(-1, 1) selection = np.transpose((vtimes >= start) & (vtimes <= end)) np.set_printoptions(threshold=np.nan) grain_data = [] for grain in selection: grain_data.append((self.analysis_group["FFT"]["frames"][grain, :], times[grain])) return grain_data
[docs] def hdf5_dataset_formatter(self, *args, **kwargs): ''' Formats the output from the analysis method to save to the HDF5 file. Places data and attributes in 2 dictionaries to be stored in the HDF5 file. ''' frames, frame_times = self.create_fft_analysis(*args, **kwargs) return ( { 'frames': frames, 'times': frame_times }, { 'win_size': kwargs.pop('window_size', 512), 'overlap': kwargs.pop('overlap', 2), 'window_type': kwargs.pop('window_type', 'hanning') } )
@staticmethod
[docs] def stft(sig, frameSize, overlapFac=0.5, window=np.hanning): """Short time fourier transform of audio signal.""" win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) samples = np.append(np.zeros(np.floor(frameSize/2).astype(int)), sig) # cols for windowing cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1 # zeros at end (thus samples can be fully covered by frames) samples = np.append(samples, np.zeros(frameSize)) frames = stride_tricks.as_strided( samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0]) ).copy() frames *= win return np.fft.rfft(frames)
''' def logscale_spec(self, spec, sr=44100, factor=20.): """Scale frequency axis logarithmically.""" # Get a count of times and frequencies from fft frames timebins, freqbins = np.shape(spec) # Create array from 0 to 1 with values for each frequency bin. # Scale by a power of the factor provided. scale = np.linspace(0, 1, freqbins) ** factor # Scale to the number of frequency bins scale *= (freqbins-1)/max(scale) # Round to the nearest whole number and reduce to only unique numbers. scale = np.unique(np.round(scale)) # Create a new complex number array with the number of time frames and # the new number of frequency bins newspec = np.complex128(np.zeros([timebins, len(scale)])) # For each of the frequency bins for i in range(0, len(scale)): # If it is the highest frequency bin... if i == len(scale)-1: # Sum all frequency bins from the scale index upwards newspec[:, i] = np.sum(spec[:, scale[i]:], axis=1) else: # Sum all frequency bins from the current scale index up to the # next scale index newspec[:, i] = np.sum(spec[:, scale[i]:scale[i+1]], axis=1) # List the center frequency of bins allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1]) freqs = [] # For each of the frequency bins for i in range(0, len(scale)): if i == len(scale)-1: freqs += [np.mean(allfreqs[scale[i]:])] else: freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])] return newspec, freqs def plotstft(self, samples, fs, binsize=2**10, plotpath=None, colormap="jet"): """Plot spectrogram.""" # Get all fft frames s = self.analysis['data'][:] sshow, freq = self.logscale_spec(s, factor=1.0, sr=fs) # Amplitude to decibel ims = 20.*np.log10(np.abs(sshow)/10e-6) # Get the dimensions of the fft timebins, freqbins = np.shape(ims) plt.figure(figsize=(15, 7.5)) plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap) # Add a colour bar to the side of the spectrogram. plt.colorbar() # Set spectrogram labels plt.xlabel("time (s)") plt.ylabel("frequency (hz)") plt.xlim([0, timebins-1]) plt.ylim([0, freqbins]) # Create an array of 5 values from 0 to the number of times xlocs = np.float32(np.linspace(0, timebins-1, 5)) # Display time values at 5 points along the x axis of the graph plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/fs]) # Display frequency values at 10 points along the y axis of the graph ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10))) plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) if plotpath: plt.savefig(plotpath, bbox_inches="tight") else: plt.show() plt.clf() '''
[docs] def calc_fft_frame_times(self, fftframes, sample_frames, samplerate): """Calculate times for frames using sample size and samplerate.""" if hasattr(sample_frames, '__call__'): sample_frames = sample_frames() # Get number of frames for time and frequency timebins, freqbins = np.shape(fftframes) # Create array ranging from 0 to number of time frames scale = np.arange(timebins+1) # divide the number of samples by the total number of frames, then # multiply by the frame numbers. fft_times = (sample_frames.shape[0]/timebins) * scale[:-1] # Divide by the samplerate to give times in seconds fft_times = fft_times / samplerate return fft_times