Source code for analysis.Analysis

from __future__ import print_function
import os
import numpy as np
import logging
import pdb

from fileops import pathops

logger = logging.getLogger(__name__)

[docs]class Analysis(object):
""" Basic descriptor class to build analyses on. The Analysis base class works as an interface between child descriptor objects and the HDF5 storage file. This is designed to seperate descriptor generation from data IO, allowing for quick development of new descriptor classes. The base Analysis class has methods for retreiving analyses from file and saving data created by analysis objects to file. It also has basic formatting methods used to return data in the required format for processed such as descriptor comparisons. In order to create a new descriptor, the hdf5_dataset_formatter method will need to be overwritten by the child class to generate and store the descriptor's output in the appropriate manner. Examples of this can be seen through the currently implemented descriptors. """ def __init__(self, AnalysedAudioFile, frames, analysis_group, name, config=None): # Create object logger self.logger = logging.getLogger(__name__ + '.{0}Analysis'.format(name)) # Store AnalysedAudioFile object to be analysed. self.AnalysedAudioFile = AnalysedAudioFile self.analysis_group = analysis_group self.name = name def create_analysis(self, *args, **kwargs): """ Create the analysis and save to the HDF5 file. analysis_function: The function used to create the analysis. returned data will be stored in the HDF5 file. """ try: self.analysis = self.analysis_group.create_group(self.name) except ValueError: self.logger.info("{0} analysis group already exists".format(self.name)) self.analysis = self.analysis_group[self.name] # If forcing new analysis creation then delete old analysis and create # a new one if self.AnalysedAudioFile.force_analysis: self.logger.info("Force re-analysis is enabled. " "deleting: {0}".format(self.analysis.name)) # Delete all pre-existing data in database. for i in self.analysis.iterkeys(): del self.analysis[i] # Run the analysis function and format it's returned data ready to # be saved in the HDF5 file data_dict, attrs_dict = self.hdf5_dataset_formatter(*args, **kwargs) for key, value in data_dict.iteritems(): self.analysis.create_dataset(key, data=value, chunks=True) for key, value in attrs_dict.iteritems(): self.analysis.attrs[key] = value else: if self.analysis.keys(): self.logger.info("Analysis already exists. Reading from: " "{0}".format(self.analysis.name)) else: # If it doesn't then generate a new file # Run the analysis function and format it's returned data ready to # be saved in the HDF5 file data_dict, attrs_dict = self.hdf5_dataset_formatter(*args, **kwargs) for key, value in data_dict.iteritems(): self.analysis.create_dataset(key, data=value, chunks=True) for key, value in attrs_dict.iteritems(): self.analysis.attrs[key] = value def get_analysis_grains(self, start, end): """ Retrieve analysis frames for period specified in start and end times. arrays of start and end time pairs will produce an array of equivelant size containing frames for these times. """ times = self.analysis_group[self.name]["times"][:] start = start / 1000 end = end / 1000 vtimes = times.reshape(-1, 1) selection = np.transpose((vtimes >= start) & (vtimes <= end)) # If there are no frames for this grain, take the two closest frames # from the adjacent grains. if not selection.any(): frame_center = start + (end-start)/2. closest_frames = np.abs(vtimes-frame_center).argsort()[:2] selection[closest_frames] = True #start_ind = np.min(selection) #end_ind = np.argmax(selection) frames = self.analysis_group[self.name]["frames"][:] grain_data = (frames, selection) return grain_data def hdf5_dataset_formatter(analysis_method, *args, **kwargs): ''' Note: This is a generic formatter designed as a template to be overwritten by a descriptor sub-class. Formats the output from the analysis method to save to the HDF5 file. Places data and attributes in 2 dictionaries to be stored in the HDF5 file. ''' output, attributes = analysis_method(*args, **kwargs) return ({'data': output}, {'attrs': attributes}) ################################################################################ # Formatting functions ################################################################################ def log2_median(self, x): return np.median(1000 * np.log2(1+x/1000)) def log2_mean(self, x): return np.mean(1000 * np.log2(1+x/1000)) def formatter_func(self, selection, frames, valid_inds, formatter=None): # get all valid frames from current grain frames = frames[selection & valid_inds] return formatter(frames) #if less than half the frames are valid then the grain is not valid. if frames.size < valid_inds[selection].nonzero()[0].size/2: return np.nan def analysis_formatter(self, frames, selection, format): """Calculate the average analysis value of the grain using the match format specified.""" valid_inds = np.isfinite(frames) format_style_dict = { 'mean': np.mean, 'median': np.median, 'log2_mean': self.log2_mean, 'log2_median': self.log2_median, } output = np.empty(len(selection)) if not selection.size: # TODO: Add warning here return np.nan # For debugging apply_along_axis: #for ind, i in enumerate(selection): # output[ind] = self.formatter_func(i, frames, valid_inds, formatter=format_style_dict[format]) output = np.apply_along_axis(self.formatter_func, 1, selection, frames, valid_inds, formatter=format_style_dict[format]) return output