Commit f7c3f3a1 authored by larsvanarragon's avatar larsvanarragon

final version for comp 2

parent 950e78be
tqdm
numpy
tensorflow-gpu==1.11
pandas
numpy
lightgbm
scikit-learn
matplotlib
keras
librosa
seaborn
PyWavelets
xgboost
catboost
\ No newline at end of file
from keras import backend as K
from keras.optimizers import Optimizer
class AdaBound(Optimizer):
"""AdaBound optimizer.
Default parameters follow those provided in the original paper.
# Arguments
lr: float >= 0. Learning rate.
final_lr: float >= 0. Final learning rate.
beta_1: float, 0 < beta < 1. Generally close to 1.
beta_2: float, 0 < beta < 1. Generally close to 1.
gamma: float >= 0. Convergence speed of the bound function.
epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
decay: float >= 0. Learning rate decay over each update.
weight_decay: Weight decay weight.
amsbound: boolean. Whether to apply the AMSBound variant of this
algorithm.
# References
- [Adaptive Gradient Methods with Dynamic Bound of Learning Rate]
(https://openreview.net/forum?id=Bkg3g2R9FX)
- [Adam - A Method for Stochastic Optimization]
(https://arxiv.org/abs/1412.6980v8)
- [On the Convergence of Adam and Beyond]
(https://openreview.net/forum?id=ryQu7f-RZ)
"""
def __init__(self, lr=0.001, final_lr=0.1, beta_1=0.9, beta_2=0.999, gamma=1e-3,
epsilon=None, decay=0., amsbound=False, weight_decay=0.0, **kwargs):
super(AdaBound, self).__init__(**kwargs)
if not 0. <= gamma <= 1.:
raise ValueError("Invalid `gamma` parameter. Must lie in [0, 1] range.")
with K.name_scope(self.__class__.__name__):
self.iterations = K.variable(0, dtype='int64', name='iterations')
self.lr = K.variable(lr, name='lr')
self.beta_1 = K.variable(beta_1, name='beta_1')
self.beta_2 = K.variable(beta_2, name='beta_2')
self.decay = K.variable(decay, name='decay')
self.final_lr = final_lr
self.gamma = gamma
if epsilon is None:
epsilon = K.epsilon()
self.epsilon = epsilon
self.initial_decay = decay
self.amsbound = amsbound
self.weight_decay = float(weight_decay)
self.base_lr = float(lr)
def get_updates(self, loss, params):
grads = self.get_gradients(loss, params)
self.updates = [K.update_add(self.iterations, 1)]
lr = self.lr
if self.initial_decay > 0:
lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
K.dtype(self.decay))))
t = K.cast(self.iterations, K.floatx()) + 1
# Applies bounds on actual learning rate
step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
(1. - K.pow(self.beta_1, t)))
final_lr = self.final_lr * lr / self.base_lr
lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
upper_bound = final_lr * (1. + 1. / (self.gamma * t))
ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
if self.amsbound:
vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
else:
vhats = [K.zeros(1) for _ in params]
self.weights = [self.iterations] + ms + vs + vhats
for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
# apply weight decay
if self.weight_decay != 0.:
g += self.weight_decay * K.stop_gradient(p)
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
if self.amsbound:
vhat_t = K.maximum(vhat, v_t)
denom = (K.sqrt(vhat_t) + self.epsilon)
self.updates.append(K.update(vhat, vhat_t))
else:
denom = (K.sqrt(v_t) + self.epsilon)
# Compute the bounds
step_size_p = step_size * K.ones_like(denom)
step_size_p_bound = step_size_p / denom
bounded_lr_t = m_t * K.minimum(K.maximum(step_size_p_bound,
lower_bound), upper_bound)
p_t = p - bounded_lr_t
self.updates.append(K.update(m, m_t))
self.updates.append(K.update(v, v_t))
new_p = p_t
# Apply constraints.
if getattr(p, 'constraint', None) is not None:
new_p = p.constraint(new_p)
self.updates.append(K.update(p, new_p))
return self.updates
def get_config(self):
config = {'lr': float(K.get_value(self.lr)),
'final_lr': float(self.final_lr),
'beta_1': float(K.get_value(self.beta_1)),
'beta_2': float(K.get_value(self.beta_2)),
'gamma': float(self.gamma),
'decay': float(K.get_value(self.decay)),
'epsilon': self.epsilon,
'weight_decay': self.weight_decay,
'amsbound': self.amsbound}
base_config = super(AdaBound, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
from threading import Thread
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from compare_test_train_features import find_representive_features
from models import NuSVRModel
class Context:
def __init__(self, X_train, X_test, y_train, y_test, X_train_temp, X_test_temp, feats, feat_scores):
self.X_train = X_train.copy()
self.X_test = X_test.copy()
self.y_train = y_train.copy().values.ravel()
self.y_test = y_test.copy().values.ravel()
self.X_train_temp = X_train_temp.copy()
self.X_test_temp = X_test_temp.copy()
self.feats = feats
self.feat_scores = feat_scores
def load_data(threshold=0.35, filter_bad_feats=True):
X = pd.read_pickle("../temp/X.pkl")
y = pd.read_pickle("../temp/y.pkl")
if filter_bad_feats:
good_hombres, bad_hombres = find_representive_features(threshold=threshold)
X = X[good_hombres.index]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
return X_train, X_test, y_train, y_test
def do_feats(context: Context):
for feat in context.feats:
# Add current column to X
context.X_train_temp[feat] = X_train[feat].values
context.X_test_temp[feat] = X_test[feat].values
# Train model
model = NuSVRModel(n_feats=context.X_train_temp.shape[1])
model.fit(context.X_train_temp, y_train.values.ravel(), None, None)
# Check performance
y_pred = model.predict(context.X_test_temp)
mae = mean_absolute_error(y_test.values.squeeze().ravel(), y_pred.ravel())
context.feat_scores[feat] = mae
# Reset features
context.X_train_temp.drop(feat, axis=1, inplace=True)
context.X_test_temp.drop(feat, axis=1, inplace=True)
def partition(lst, n):
'''
Split the list into n parts.
'''
return [lst[i::n] for i in range(n)]
def calculate_feature_scores(X_train, X_test, y_train, y_test, n_threads):
file = open('../temp/feature_scores.py', 'w')
file.write("[")
# Get baseline score
model = NuSVRModel(n_feats=X_train.shape[1])
model.fit(X_train, y_train.values.ravel(), None, None)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test.values.squeeze().ravel(), y_pred.ravel())
# Iterate over features and rank them
X_train_temp = pd.DataFrame(index=X_train.index)
X_test_temp = pd.DataFrame(index=X_test.index)
feats = list(X_train)
current = ("baseline with all features", mae)
while len(feats) > 0:
print(f"{len(feats)} features left ({str(current)})")
feat_scores = {}
threads = []
feats_splits = partition(feats, n_threads)
for feats_split in feats_splits:
context = Context(X_train,
X_test,
y_train,
y_test,
X_train_temp,
X_test_temp,
feats_split,
feat_scores)
thread = Thread(target=do_feats, args=(context,))
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
# Find best feature
ordered_feats = sorted(feat_scores.items(), key=lambda x: x[1])
current = ordered_feats[0]
best_feature = current[0]
# Log result
file.write(f" {str(ordered_feats[0])}, \n")
file.flush()
# Keep best features
X_train_temp[best_feature] = X_train[best_feature].values
X_test_temp[best_feature] = X_test[best_feature].values
# Drop from original
feats.remove(best_feature)
file.write(f" ] \n")
file.close()
return X_train_temp
if __name__ == "__main__":
X_train, X_test, y_train, y_test = load_data(threshold=0.35, filter_bad_feats=True)
calculate_feature_scores(X_train, X_test, y_train, y_test, n_threads=32)
import glob
import re
import numpy as np
import pandas as pd
from sklearn.svm import NuSVR
from main import save_predictions
def load_data():
x_train_files = glob.glob("../temp/pred_valid*")
x_test_files = glob.glob("../temp/submission*")
y_train = pd.read_pickle("../temp/y.pkl")
x_train = []
for file in x_train_files:
x = pd.read_pickle(file)
x_train.append(x.reset_index(drop=True))
x_train = pd.concat(x_train, axis=1)
x_test = []
ids = None
for file in x_test_files:
match = re.search("submission_(.*)_\d+.csv", file)
name = match.groups(1)[0]
x = pd.read_csv(file, delimiter=',')
ids = x['seg_id']
x = x.drop(['seg_id'], axis=1)
x.rename(columns={list(x)[0]: name}, inplace=True)
x_test.append(x.reset_index(drop=True))
x_test = pd.concat(x_test, axis=1)
return x_train, y_train, x_test, ids
if __name__ == "__main__":
meme_factor = 1.1
x_train, y_train, x_test, ids = load_data()
model = NuSVR()
model.fit(x_train, y_train)
pred_train = model.predict(x_train)
mae = np.mean(np.abs(y_train.values.squeeze() - pred_train.squeeze()))
print(f"\nError: {mae} \n")
pred_test = model.predict(x_test) * meme_factor
dict = dict(zip(ids, pred_test))
save_predictions(dict, "submission")
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
def plot_mean_abs_diff(df):
fig, ax = plt.subplots()
ax.plot(range(len(df.mean_abs_diff.values)), df.mean_abs_diff.values)
ax.set(xlabel='Features', ylabel='Absolute difference', title='Mean absolute difference between features in train and test set.')
ax.set_xticklabels([])
ax.grid()
fig.savefig("../temp/plt_train_test_differences.png")
plt.show()
def find_representive_features(threshold=None):
# Load dataframes
X_train: pd.DataFrame = pd.read_pickle("../temp/X.pkl")
X_test: pd.DataFrame = pd.read_pickle("../temp/X_test.pkl")
assert set(X_train.columns) == set(X_test.columns)
features = X_train.columns
# Normalize data
scaler = StandardScaler()
scaler.fit(X_train)
X_train.loc[:, :] = scaler.transform(X_train)
X_test.loc[:, :] = scaler.transform(X_test)
# Calculate differences
df = pd.DataFrame(index=features)
for feature in features:
feature_train = X_train[feature]
feature_test = X_test[feature]
df.at[feature, 'mean_train'] = np.mean(feature_train)
df.at[feature, 'mean_test'] = np.mean(feature_test)
df.at[feature, 'mean_diff'] = np.mean(feature_train) - np.mean(feature_test)
df.at[feature, 'mean_abs_diff'] = np.abs(np.mean(feature_train) - np.mean(feature_test))
# Serialize dataframe
df = df.sort_values('mean_abs_diff')
df.to_pickle("../temp/feature_differences.pkl")
# Filter
if not threshold:
threshold = np.mean(df.mean_abs_diff)
bad_hombres = df[df.mean_abs_diff > threshold]
good_hombres = df[df.mean_abs_diff <= threshold]
# Sort
bad_hombres = bad_hombres.sort_values('mean_abs_diff', ascending=False)
good_hombres = good_hombres.sort_values('mean_abs_diff')
# Save
bad_hombres.to_pickle("../temp/features_bad.pkl")
good_hombres.to_pickle("../temp/features_good.pkl")
return good_hombres, bad_hombres
if __name__ == "__main__":
good_hombres, bad_hombres = find_representive_features(0.35)
print("Good hombres:")
# print(good_hombres.head(12))
print("Bad hombres:")
print(bad_hombres)
df = pd.read_pickle("../temp/feature_differences.pkl")
plot_mean_abs_diff(df)
[ ('raw_mfcc_4_mean', 2.1631470687052641),
('lowpass_mfcc_9_mean', 2.1297781390539701),
('lowpass_mfcc_4_std', 2.1031042210112818),
('raw_mfcc_13_mean', 2.0915637939049705),
('denoised_mfcc_12_mean', 2.0751307616699162),
('denoised_lowpass_amplitude_envelope_mean', 2.0617562942009298),
('raw_mfcc_10_std', 2.0530657866201278),
('denoised_lowpass_mfcc_17_std', 2.0483216529703667),
('denoised_lowpass_mfcc_18_mean', 2.0448773893903032),
('lowpass_spectrum_skew', 2.0404829923457291),
('denoised_lowpass_mfcc_9_mean', 2.0372912240067684),
('denoised_lowpass_mfcc_12_std', 2.0344837232965491),
('denoised_mfcc_8_mean', 2.0323943069068173),
('raw_mfcc_18_mean', 2.0305869154525111),
('raw_mfcc_11_mean', 2.0289850681077577),
('denoised_lowpass_mfcc_6_std', 2.0240048683544836),
('raw_mfcc_5_mean', 2.0208732792244213),
('raw_mfcc_6_mean', 2.0173416585274326),
('denoised_lowpass_mfcc_3_std', 2.0138461638015337),
('denoised_mfcc_5_mean', 2.0122907497977272),
('amplitude_envelope_17', 2.0085279355516552),
('raw_base_skew', 2.0018315307839663),
('raw_amplitude_envelope_mean', 1.9994526602554825),
('denoised_lowpass_spectrum_skew', 1.9985719429340834),
('denoised_amplitude_envelope_mean', 1.9975155471752422),
('raw_mfcc_18_std', 1.9969403228438407),
('denoised_base_skew', 1.9960915212376429),
('lowpass_amplitude_envelope_mean', 1.9958861529491345),
('denoised_lowpass_mfcc_18_std', 1.9958433553710446),
('lowpass_rmse_mean', 1.996260343115225),
('lowpass_mfcc_18_mean', 1.9967297936955977),
('denoised_amplitude_envelope_q99', 1.9967974457935758),
('denoised_mfcc_0_trend', 1.9961979919133064),
('denoised_lowpass_rmse_mean', 1.9962392478723889),
('lowpass_base_q995', 1.9962212523805145),
('denoised_lowpass_mfcc_10_std', 1.9951978551796734),
('lowpass_mfcc_17_std', 1.9948612124671228),
('lowpass_mfcc_18_std', 1.9949622281865427),
('denoised_amplitude_envelope_q1', 1.9952853920800588),
('denoised_amplitude_envelope_q2', 1.9958728902506182),
('denoised_lowpass_base_std', 1.9962303725121728),
('lowpass_mfcc_6_mean', 1.9961506907597182),
('denoised_mfcc_18_std', 1.9953209958158529),
('denoised_mfcc_2_trend', 1.9954989094931865),
('denoised_mfcc_5_trend', 1.9958923328531002),
('raw_rmse_mean', 1.9964497816196451),
('denoised_lowpass_mfcc_0_trend', 1.9970440166245287),
('denoised_lowpass_mfcc_6_mean', 1.997768880509091),
('denoised_rmse_mean', 1.9982115523720643),
('lowpass_mfcc_0_trend', 1.998746383640474),
('denoised_mfcc_4_trend', 1.9993770344490263),
('denoised_amplitude_envelope_q995', 2.0000877502843912),
('denoised_mfcc_15_std', 2.0001726016332353),
('denoised_lowpass_mfcc_13_std', 2.0002065012626846),
('denoised_mfcc_14_std', 1.9999401540310593),
('lowpass_mfcc_17_mean', 1.9999791880085871),
('lowpass_base_skew', 1.9988962134233117),
('raw_mfcc_8_mean', 1.9988585818477915),
('lowpass_mfcc_13_std', 1.9988389569530531),
('lowpass_mfcc_10_std', 1.9984219155143701),
('raw_mfcc_19_mean', 1.9988080693702452),
('amplitude_envelope_27', 1.998477079182124),
('denoised_mfcc_11_std', 1.9971707892753245),
('raw_mfcc_12_std', 1.997033860234803),
('denoised_lowpass_mfcc_17_mean', 1.9969534847695376),
('lowpass_mfcc_9_std', 1.9967133674269437),
('raw_mfcc_13_std', 1.9964729223249231),
('denoised_mfcc_9_mean', 1.9965485596541426),
('lowpass_mfcc_11_std', 1.9961731228637414),
('lowpass_mfcc_12_std', 1.9961201661381995),
('lowpass_mfcc_19_mean', 1.9960854066771552),
('denoised_chroma_stft_2_mean', 1.9962156298913076),
('denoised_chroma_stft_5_std', 1.9964344891983805),
('denoised_lowpass_mfcc_12_trend', 1.9966924010825591),
('denoised_mfcc_10_trend', 1.9970428660102735),
('denoised_chroma_stft_3_std', 1.9973880976710618),
('denoised_lowpass_base_skew', 1.9977202045751046),
('denoised_mfcc_7_trend', 1.9979963095392785),
('denoised_lowpass_mfcc_11_std', 1.9983189552476748),
('lowpass_mfcc_19_trend', 1.998686919189028),
('raw_mfcc_6_trend', 1.9990414225559281),
('denoised_lowpass_mfcc_16_trend', 1.9994251614277858),
('denoised_mfcc_18_trend', 1.9998613833087011),
('raw_chroma_stft_6_std', 2.0002397266778389),
('raw_mfcc_1_std', 2.0006515902223851),
('denoised_lowpass_mfcc_8_mean', 2.0008765563520083),
('lowpass_mfcc_1_std', 2.0010491392882894),
('raw_mfcc_8_std', 2.0008614414037886),
('lowpass_mfcc_8_mean', 2.0006332848516624),
('raw_mfcc_14_std', 2.0006596545900912),
('raw_mfcc_3_trend', 2.0008300034162345),
('denoised_lowpass_mfcc_1_std', 2.0010517868004878),
('denoised_mfcc_3_trend', 2.001257923354451),
('denoised_lowpass_mfcc_9_trend', 2.0016120044489765),
('denoised_mfcc_12_std', 2.0020553119996416),
('lowpass_chroma_stft_1_std', 2.0023496028653551),
('raw_chroma_stft_8_std', 2.0027503588427229),
('denoised_amplitude_envelope_q9', 2.0031247338719167),
('lowpass_mfcc_3_std', 2.0034237052028394),
('raw_mfcc_15_mean', 2.0036339592001902),
('lowpass_mfcc_9_trend', 2.0038809224883618),
('raw_rmse_trend', 2.0041734490069594),
('raw_chroma_stft_9_std', 2.004497917587849),
('raw_chroma_stft_11_mean', 2.004839897685168),
('denoised_amplitude_envelope_q95', 2.0051856743292427),
('denoised_mfcc_12_trend', 2.0054530282387062),
('denoised_spectrum_std', 2.0057594347899306),
('denoised_amplitude_envelope_q8', 2.0060704125537971),
('denoised_lowpass_spectrum_std', 2.0063926783744863),
('raw_chroma_stft_4_std', 2.0067616612882926),
('lowpass_mfcc_5_trend', 2.0071139126368998),
('lowpass_mfcc_6_trend', 2.0074645101109967),
('denoised_zero_crossing_std', 2.0078333982301935),
('denoised_lowpass_chroma_stft_3_std', 2.0082251785190746),
('denoised_lowpass_mfcc_6_trend', 2.0086159170374076),
('denoised_mfcc_0_std', 2.0089469110472624),
('amplitude_envelope_20', 2.0050872266422104),
('raw_mfcc_2_mean', 2.0018228031459753),
('lowpass_mfcc_0_mean', 1.9959007667897612),
('amplitude_envelope_6', 1.9917478706209941),
('denoised_spectrum_skew', 1.9897934715189387),
('lowpass_mfcc_14_mean', 1.9884806426546928),
('denoised_mfcc_9_std', 1.9873278674191908),
('lowpass_mfcc_13_mean', 1.9862824594682478),
('lowpass_mfcc_2_mean', 1.9849210198499301),
('raw_spectrum_skew', 1.9844000541683211),
('denoised_lowpass_mfcc_13_mean', 1.9837867286837634)]
\ No newline at end of file
import librosa
import numpy as np
import pywt
import scipy as sp
import scipy.signal as sp
from scipy.signal import find_peaks, detrend, savgol_filter
from scipy.stats import skew
from compare_test_train_features import find_representive_features
from select_features import load_feature_scores, get_best_features
SAMPLE_RATE = 4_000_000
LOWPASS_FILTER_CUTOFF = 140_000
PEAK_THRESHOLD = 0.001
PEAK_SMOOTHING_FACTOR = 401
N_MFCC = 20
def apply_fourier_transform(x):
N = len(x)
x = detrend(x)
spectrum = np.fft.fft(x)[:N // 2] / N
spectrum[1:] = 2 * spectrum[1:]
return np.abs(spectrum)
def smooth_signal(y, box_pts):
box = np.ones(box_pts) / box_pts
y_smooth = np.convolve(y, box, mode='same')
return y_smooth
def get_peaks(spectrum, n_peaks=32, smoothing=PEAK_SMOOTHING_FACTOR, prominence=PEAK_THRESHOLD):
dom_freq = np.argmax(spectrum)
# Smooth the spectrum and find the peaks
#if smoothing:
# spectrum = savgol_filter(spectrum, PEAK_SMOOTHING_FACTOR, 3)
# spectrum_smooth = smooth_signal(abs(spectrum), 801)
peaks, props = find_peaks(spectrum, prominence=prominence)
num = len(peaks)
# Create sorted features of the peams and prominences
f_peaks = [-1] * n_peaks
f_peak_prom = [-1] * n_peaks
temp = sorted(zip(peaks, props['prominences']), key=lambda tup: -tup[1])
for i, peak in enumerate(temp):
if i >= n_peaks: break
f_peaks[i] = peak[0]
f_peak_prom[i] = peak[1]
return f_peaks, f_peak_prom, dom_freq, num
def denoise_signal(x, wavelet='db4', level=1):
# Decompose to get the wavelet coefficients
coeff = pywt.wavedec(x, wavelet, mode="per")
# Calculate sigma for threshold as defined in http://dspace.vsb.cz/bitstream/handle/10084/133114/VAN431_FEI_P1807_1801V001_2018.pdf
# As noted by @harshit92 MAD referred to in the paper is Mean Absolute Deviation not Median Absolute Deviation
d = (coeff[-level])
sigma = (1 / 0.6745) * np.mean(np.absolute(d - np.mean(d)))
# Calculte the univeral threshold
uthresh = sigma * np.sqrt(2 * np.log(len(x)))
coeff[1:] = (pywt.threshold(i, value=uthresh, mode='hard') for i in coeff[1:])
# Reconstruct the signal using the thresholded coefficients
return pywt.waverec(coeff, wavelet, mode='per')
def butter_lowpass(cutoff, fs, type='low', order=5):
nyq = 0.5 * fs
normal_cutoff = cutoff / nyq
b, a = sp.butter(order, normal_cutoff, btype=type, analog=False)
return b, a
def butter_lowpass_filter(data, cutoff, fs, type='low', order=5):
b, a = butter_lowpass(cutoff, fs, type, order=order)
y = sp.lfilter(b, a, data)
return y
def add_prefix(prefix, features):
out_features = []
for f in features:
out_features.append((prefix + f[0], f[1]))
return out_features
def avg_mean_etc(prefix, X, add_quantiles):
feats = [
(prefix + "_mean", X.mean()),
(prefix + "_std", X.std()),
(prefix + "_skew", skew(X)),
(prefix + "_trend", trend(X)),
]
if add_quantiles:
feats.extend([
(prefix + "_q01", np.percentile(X, 0.01)),
(prefix + "_q05", np.percentile(X, 0.05)),
(prefix + "_q1", np.percentile(X, 0.1)),
(prefix + "_q2", np.percentile(X, 0.2)),
(prefix + "_q8", np.percentile(X, 0.8)),
(prefix + "_q9", np.percentile(X, 0.9)),
(prefix + "_q95", np.percentile(X, 0.95)),
(prefix + "_q99", np.percentile(X, 0.99)),
(prefix + "_q995", np.percentile(X, 0.995)),
])
return feats
def trend(X):
trend = np.polyfit(range(len(X)), X, deg=1)
return trend[0]
def librosa_features(X):
def mean_std_trend(name, x):
x = x.squeeze()
feats = [
(f"{name}_mean", np.mean(x)),
(f"{name}_std", np.std(x)),
(f"{name}_trend", trend(x))]
return feats
f_mfcc = librosa.feature.mfcc(y=X, sr=SAMPLE_RATE, n_mfcc=N_MFCC)
f_spec_cent = librosa.feature.spectral_centroid(y=X, sr=SAMPLE_RATE)
f_spec_bw = librosa.feature.spectral_bandwidth(y=X, sr=SAMPLE_RATE)
f_rolloff = librosa.feature.spectral_rolloff(y=X, sr=SAMPLE_RATE)
f_zero_crossing = librosa.feature.zero_crossing_rate(y=X)
f_chroma_stft = librosa.feature.chroma_stft(y=X, sr=SAMPLE_RATE)
f_rmse = librosa.feature.rmse(y=X)
feats = []
feats.extend(mean_std_trend("spectral_centroid", f_spec_cent))
feats.extend(mean_std_trend("spectral_bandwidth", f_spec_bw))
feats.extend(mean_std_trend("spectral_rolloff", f_rolloff))
feats.extend(mean_std_trend("zero_crossing", f_zero_crossing))
feats.extend(mean_std_trend("rmse", f_rmse))
for i, e in enumerate(f_chroma_stft):
feats.extend(mean_std_trend("chroma_stft_{i}", e))
for i, e in enumerate(f_mfcc):
feats.extend(mean_std_trend("mfcc_{i}", e))
return feats
def convert_sequence_to_features(sequence, n_reduce, signal_name):