In [1]:

import io
import os
from zipfile import ZipFile

import numpy as np
import pandas as pd
import librosa as lr
from tqdm import tqdm

os.listdir("../input")

Out[1]:

['train_noisy.csv',
 'test',
 'train_curated',
 'train_noisy',
 'sample_submission.csv',
 'train_curated.csv']

In [2]:

df = pd.read_csv('../input/train_curated.csv', index_col='fname')
df.index = df.index.str.replace('.wav', '.npy')
binary_indicators = df.labels.str.get_dummies(',')
binary_indicators.head()

Out[2]:

	Accelerating_and_revving_and_vroom	Accordion	Acoustic_guitar	Applause	Bark	Bass_drum	Bass_guitar	Bathtub_(filling_or_washing)	Bicycle_bell	Burping_and_eructation	Bus	Buzz	Car_passing_by	Cheering	Chewing_and_mastication	Child_speech_and_kid_speaking	Chink_and_clink	Chirp_and_tweet	Church_bell	Clapping	Computer_keyboard	Crackle	Cricket	Crowd	Cupboard_open_or_close	Cutlery_and_silverware	Dishes_and_pots_and_pans	Drawer_open_or_close	Drip	Electric_guitar	Fart	Female_singing	Female_speech_and_woman_speaking	Fill_(with_liquid)	Finger_snapping	Frying_(food)	Gasp	Glockenspiel	Gong	Gurgling	Harmonica	Hi-hat	Hiss	Keys_jangling	Knock	Male_singing	Male_speech_and_man_speaking	Marimba_and_xylophone	Mechanical_fan	Meow	Microwave_oven	Motorcycle	Printer	Purr	Race_car_and_auto_racing	Raindrop	Run	Scissors	Screaming	Shatter	Sigh	Sink_(filling_or_washing)	Skateboard	Slam	Sneeze	Squeak	Stream	Strum	Tap	Tick-tock	Toilet_flush	Traffic_noise_and_roadway_noise	Trickle_and_dribble	Walk_and_footsteps	Water_tap_and_faucet	Waves_and_surf	Whispering	Writing	Yell	Zipper_(clothing)
fname
0006ae4e.npy	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
0019ef41.npy	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
001ec0ad.npy	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
0026c7cb.npy	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
0026f116.npy	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0

In [20]:

from librosa.display import specshow


def preprocess(wavfile):

    # Load roughly 8 seconds of audio.
    samples = 512*256 - 1
    samplerate = 16000
    waveform = lr.load(wavfile, samplerate, duration=samples/samplerate)[0]

    # Loop too short audio clips.
    if len(waveform) < samples:
        waveform = np.pad(waveform, (0, samples - len(waveform)), mode='wrap')

    # Convert audio to log-mel spectrogram.
    spectrogram = lr.feature.melspectrogram(waveform, samplerate, n_mels=256)
    spectrogram = lr.power_to_db(spectrogram)
    spectrogram = spectrogram.astype(np.float32)

    return spectrogram


sample = df.sample()
spectrogram = preprocess('../input/train_curated/0006ae4e.wav')
ax = specshow(spectrogram, y_axis='mel', x_axis='time')
ax.set_title('Example spectrogram')
spectrogram.shape

Out[20]:

(256, 256)

No description has been provided for this image

In [ ]:

with ZipFile('dataset.zip', 'w') as z:

    with io.StringIO() as f:
        binary_indicators.to_csv(f)
        z.writestr('index.csv', f.getvalue())

    d = '../input/train_curated'
    for n in tqdm(os.listdir(d), desc='Zipping spectrograms'):
        wavfile = os.path.join(d, n)
        spectrogram = preprocess(wavfile)
        with io.BytesIO() as f:
            np.save(f, spectrogram)
            z.writestr(n.replace('.wav', '.npy'), f.getvalue())

Comments