AudioValid
/
fourier-ondemand


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
							from __future__ import absolute_import
import os
import sys
import fnmatch
import numpy as np
from pydub import AudioSegment
from pydub.utils import audioop
from dejavu import wavio
from hashlib import sha1

DEFAULT_FS = int(44100 / 2)

if sys.version_info >= (3, 0):
    xrange = range

def unique_hash(filepath, blocksize=2**20):
    """ Small function to generate a hash to uniquely generate
    a file. Inspired by MD5 version here:
    http://stackoverflow.com/a/1131255/712997

    Works with large files. 
    """
    s = sha1()
    with open(filepath , "rb") as f:
        while True:
            buf = f.read(blocksize)
            if not buf:
                break
            s.update(buf)
    return s.hexdigest().upper()


def find_files(path, extensions):
    # Allow both with ".mp3" and without "mp3" to be used for extensions
    extensions = [e.replace(".", "") for e in extensions]

    for dirpath, dirnames, files in os.walk(path):
        for extension in extensions:
            for f in fnmatch.filter(files, "*.%s" % extension):
                p = os.path.join(dirpath, f)
                yield (p, extension)

def read_chunks(filename, chunk_size = 1, start = 0, fmt = None):
    start = start * 1000
    chunk_size = chunk_size * 1000
    filename_hash = unique_hash(filename)
    try:
        audiofile = AudioSegment.from_file(filename,fmt)

        if audiofile.frame_rate != DEFAULT_FS:
            audiofile = audiofile.set_frame_rate(DEFAULT_FS)
        while True:
            end = start + chunk_size
            audio_chunk = audiofile[start:end]
            if len(audio_chunk) == 0:
                return;

            data = np.fromstring(audio_chunk._data, np.int16)

            channels = []
            for chn in xrange(audio_chunk.channels):
                channels.append(data[chn::audio_chunk.channels])

            yield channels, audio_chunk.frame_rate,filename_hash, len(audio_chunk)
            start = end
    
    except audioop.error:
        fs, _, audiofile = wavio.readwav(filename)
        
        if audiofile.frame_rate != DEFAULT_FS:
            audiofile = audiofile.set_frame_rate(DEFAULT_FS)    

        while True:
            end = start + chunk_size
            audio_chunk = audiofile[start:end]
            if len(audio_chunk) == 0:
                return;

            audio_chunk = audio_chunk.T
            audio_chunk = audio_chunk.astype(np.int16)

            channels = []
            for chn in audio_chunk:
                channels.append(chn)

            yield channels, audio_chunk.frame_rate, filename_hash, len(audio_chunk)
            start = end
     

def read(filename, limit=None, fmt = None, offset = 0):
    """
    Reads any file supported by pydub (ffmpeg) and returns the data contained
    within. If file reading fails due to input being a 24-bit wav file,
    wavio is used as a backup.

    Can be optionally limited to a certain amount of seconds from the start
    of the file by specifying the `limit` parameter. This is the amount of
    seconds from the start of the file.

    returns: (channels, samplerate)
    """
    if limit:
        offset = offset * 1000
        limit = offset + limit * 1000
    # pydub does not support 24-bit wav files, use wavio when this occurs
    try:
        audiofile = AudioSegment.from_file(filename,fmt)

        if audiofile.frame_rate != DEFAULT_FS:
            audiofile = audiofile.set_frame_rate(DEFAULT_FS)

        if limit:
            audiofile = audiofile[offset:limit]

        data = np.fromstring(audiofile._data, np.int16)

        channels = []
        for chn in xrange(audiofile.channels):
            channels.append(data[chn::audiofile.channels])

        fs = audiofile.frame_rate
    except audioop.error:
        fs, _, audiofile = wavio.readwav(filename)
        
        if audiofile.frame_rate != DEFAULT_FS:
            audiofile = audiofile.set_frame_rate(DEFAULT_FS)    


        if limit:
            audiofile = audiofile[offset:limit]

        audiofile = audiofile.T
        audiofile = audiofile.astype(np.int16)

        channels = []
        for chn in audiofile:
            channels.append(chn)

    return channels, audiofile.frame_rate, unique_hash(filename), len(audiofile)


def path_to_songname(path):
    """
    Extracts song name from a filepath. Used to identify which songs
    have already been fingerprinted on disk.
    """
    return os.path.splitext(os.path.basename(path))[0]