vor 4 Jahren · f5aa1b4360
--- a/Dockerfile
+++ b/Dockerfile
@@ -0,0 +1,6 @@
 
				+FROM python:2.7.18-buster
			
 
				+RUN apt-get update -y
			
 
				+RUN apt-get install -y gcc portaudio19-dev python-setuptools ffmpeg tk
			
 
				+RUN python -m pip install --upgrade pip
			
 
				+RUN pip install cryptography numpy pydub matplotlib scipy tornado requests pyaudio firebase_admin psutil mutagen
			
 
				+#RUN python setup.py install
			
--- a/Fourier-key.json
+++ b/Fourier-key.json
--- a/fourier-config.json
+++ b/fourier-config.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+  "device_id":"prueba",
			
 
				+  "basepath":"/var/fourier",
			
 
				+  "bucket": "fourier-6e14d.appspot.com",
			
 
				+  "projectId":"fourier-6e14d",
			
 
				+  "keyFilename":"/etc/Fourier-key.json",
			
 
				+  "installDir":"~/programs",
			
 
				+  "apiSecret": "prueba",
			
 
				+  "firebase": {
			
 
				+    "apiKey": "AIzaSyAH7eXARukgBCLFOfw4-Kl-NatHc-fOA68",
			
 
				+    "authDomain": "fourier-6e14d.firebaseapp.com",
			
 
				+    "databaseURL": "https://fourier-6e14d.firebaseio.com",
			
 
				+    "storageBucket": "fourier-6e14d.appspot.com",
			
 
				+    "messagingSenderId": "1032228035742"
			
 
				+  },
			
 
				+  "receivers_count":16,
			
 
				+  "database": {
			
 
				+    "user": "fourier",
			
 
				+    "password": "Sup3rDuper!", 
			
 
				+    "database": "fourier"
			
 
				+  },
			
 
				+  "streamURL": "http://monitor.audiovalid.com:8000"
			
 
				+}
			
--- a/ondemand/boxconfig.py
+++ b/ondemand/boxconfig.py
@@ -0,0 +1,27 @@
 
				+import json
			
 
				+import os
			
 
				+
			
 
				+AZAP_PATH = os.path.expanduser('~/.azap')
			
 
				+CHANNELS_PATH = os.path.join(AZAP_PATH, 'channels.conf')
			
 
				+
			
 
				+def parse_config(filename='/code/fourier-config.json'):
			
 
				+    with open(filename, 'r') as fp:
			
 
				+        the_config = json.loads(fp.read())
			
 
				+        if 'localDatabase' not in the_config:
			
 
				+            the_config['localDatabase'] = os.path.join(
			
 
				+                the_config['basepath'],
			
 
				+                the_config['device_id'],
			
 
				+                'files.db'
			
 
				+            )
			
 
				+        return the_config
			
 
				+
			
 
				+def prepare_channels(channels):
			
 
				+    if not os.path.isdir(AZAP_PATH):
			
 
				+        os.mkdir(AZAP_PATH)
			
 
				+
			
 
				+    with open(CHANNELS_PATH, 'w') as f:
			
 
				+        for channel in channels:
			
 
				+            f.write((
			
 
				+                '{id}:{herz}:{modulation}:'
			
 
				+                '{videoID}:{audioID}:{serviceID}\n'
			
 
				+                ).format(**channel))
			
--- a/ondemand/client.py
+++ b/ondemand/client.py
@@ -0,0 +1,65 @@
 
				+import requests
			
 
				+
			
 
				+ConnectionError = requests.exceptions.ConnectionError
			
 
				+
			
 
				+class Client:
			
 
				+    base_url = 'https://api.audiovalid.com'
			
 
				+
			
 
				+    def __init__(self, key, secret, base_url=None):
			
 
				+        self.key = key
			
 
				+        self.secret = secret
			
 
				+        if base_url:
			
 
				+            self.base_url = base_url
			
 
				+
			
 
				+    @property
			
 
				+    def headers(self):
			
 
				+        return {
			
 
				+            'Authorization': '{} {}'\
			
 
				+                .format(self.key, self.secret)
			
 
				+        }
			
 
				+
			
 
				+    def put_schedule_results(self, sched_id, item_id,
			
 
				+                             results, found=None,
			
 
				+                             missing_files=0):
			
 
				+        url = '{}/schedule/{}/item/{}/results'.format(
			
 
				+            self.base_url,
			
 
				+            sched_id,
			
 
				+            item_id,
			
 
				+        )
			
 
				+        response = requests.put(url,
			
 
				+            json={
			
 
				+                'results': results,
			
 
				+                'found': found,
			
 
				+                'missing_files': missing_files \
			
 
				+                                if missing_files is not None
			
 
				+                                else 0,
			
 
				+            },
			
 
				+            headers=self.headers,
			
 
				+        )
			
 
				+        return response.json()
			
 
				+
			
 
				+    def get_schedule_pending(self):
			
 
				+        url = '{}/schedule/pending'.format(self.base_url)
			
 
				+        response = requests.get(url, headers=self.headers)
			
 
				+        return response.json()
			
 
				+
			
 
				+    def get_channels(self):
			
 
				+        """ Return the list of TV channels """
			
 
				+        url = '{}/channel'.format(self.base_url)
			
 
				+        response = requests.get(url, headers=self.headers)
			
 
				+        return response.json()['channels']
			
 
				+
			
 
				+    def get_calibrations(self, station=None):
			
 
				+        """ Return the calibration of a station in a box """
			
 
				+        url = '{}/device'.format(self.base_url)
			
 
				+        params = {'what': 'calibrations'}
			
 
				+
			
 
				+        if station:
			
 
				+            params['station'] = station
			
 
				+
			
 
				+        response = requests.get(url,
			
 
				+            headers=self.headers,
			
 
				+            params=params,
			
 
				+        )
			
 
				+
			
 
				+        return response.json()['calibrations']
			
--- a/ondemand/dejavu/__init__.py
+++ b/ondemand/dejavu/__init__.py
@@ -0,0 +1,211 @@
 
				+from __future__ import absolute_import
			
 
				+from dejavu.database import get_database, Database
			
 
				+from dejavu import decoder
			
 
				+from dejavu import fingerprint
			
 
				+from pydub.exceptions import CouldntDecodeError
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+import traceback
			
 
				+import sys
			
 
				+
			
 
				+class Dejavu(object):
			
 
				+
			
 
				+    SONG_ID = "id"
			
 
				+    SONG_NAME = 'name'
			
 
				+    CONFIDENCE = 'confidence'
			
 
				+    MATCH_TIME = 'match_time'
			
 
				+    OFFSET = 'offset'
			
 
				+    OFFSET_SECS = 'offset_seconds'
			
 
				+
			
 
				+    def __init__(self, config):
			
 
				+        super(Dejavu, self).__init__()
			
 
				+
			
 
				+        self.config = config
			
 
				+
			
 
				+        # initialize db
			
 
				+        db_cls = get_database(config.get("database_type", None))
			
 
				+
			
 
				+        self.db = db_cls(**config.get("database", {}))
			
 
				+        self.db.setup()
			
 
				+
			
 
				+        # if we should limit seconds fingerprinted,
			
 
				+        # None|-1 means use entire track
			
 
				+        self.limit = self.config.get("fingerprint_limit", None)
			
 
				+        if self.limit == -1:  # for JSON compatibility
			
 
				+            self.limit = None
			
 
				+        self.get_fingerprinted_songs()
			
 
				+
			
 
				+    def get_fingerprinted_songs(self):
			
 
				+        # get songs previously indexed
			
 
				+        self.songs = self.db.get_songs()
			
 
				+        self.songhashes_set = set()  # to know which ones we've computed before
			
 
				+        self.songs_dict = {}
			
 
				+        for song in self.songs:
			
 
				+            song_hash = song[Database.FIELD_FILE_SHA1]
			
 
				+            self.songhashes_set.add(song_hash)
			
 
				+            self.songs_dict[song_hash] = song
			
 
				+
			
 
				+
			
 
				+    def fingerprint_directory(self, path, extensions, nprocesses=None):
			
 
				+        # Try to use the maximum amount of processes if not given.
			
 
				+        try:
			
 
				+            nprocesses = nprocesses or multiprocessing.cpu_count()
			
 
				+        except NotImplementedError:
			
 
				+            nprocesses = 1
			
 
				+        else:
			
 
				+            nprocesses = 1 if nprocesses <= 0 else nprocesses
			
 
				+
			
 
				+        pool = multiprocessing.Pool(nprocesses)
			
 
				+
			
 
				+        filenames_to_fingerprint = []
			
 
				+        for filename, _ in decoder.find_files(path, extensions):
			
 
				+
			
 
				+            # don't refingerprint already fingerprinted files
			
 
				+            if decoder.unique_hash(filename) in self.songhashes_set:
			
 
				+                print("%s already fingerprinted, continuing..." % filename)
			
 
				+                continue
			
 
				+
			
 
				+            filenames_to_fingerprint.append(filename)
			
 
				+
			
 
				+        # Prepare _fingerprint_worker input
			
 
				+        worker_input = zip(filenames_to_fingerprint,
			
 
				+                           [self.limit] * len(filenames_to_fingerprint))
			
 
				+
			
 
				+        # Send off our tasks
			
 
				+        iterator = pool.imap_unordered(_fingerprint_worker,
			
 
				+                                       worker_input)
			
 
				+
			
 
				+        # Loop till we have all of them
			
 
				+        while True:
			
 
				+            try:
			
 
				+                song_name, hashes, file_hash = iterator.next()
			
 
				+            except multiprocessing.TimeoutError:
			
 
				+                continue
			
 
				+            except StopIteration:
			
 
				+                break
			
 
				+            except:
			
 
				+                print("Failed fingerprinting")
			
 
				+                # Print traceback because we can't reraise it here
			
 
				+                traceback.print_exc(file=sys.stdout)
			
 
				+            else:
			
 
				+                sid = self.db.insert_song(song_name, file_hash)
			
 
				+
			
 
				+                self.db.insert_hashes(sid, hashes)
			
 
				+                self.db.set_song_fingerprinted(sid)
			
 
				+                self.get_fingerprinted_songs()
			
 
				+
			
 
				+        pool.close()
			
 
				+        pool.join()
			
 
				+
			
 
				+    def fingerprint_file(self, filepath, song_name=None):
			
 
				+        songname = decoder.path_to_songname(filepath)
			
 
				+        song_hash = decoder.unique_hash(filepath)
			
 
				+        song_name = song_name or songname
			
 
				+        # don't refingerprint already fingerprinted files
			
 
				+        if song_hash in self.songhashes_set:
			
 
				+            song = self.songs_dict[song_hash]
			
 
				+            return -1,-1,-1, song["name"] #song already fingerprinted
			
 
				+        else:
			
 
				+            song_name, length, hashes, file_hash = _fingerprint_worker(
			
 
				+                filepath,
			
 
				+                self.limit,
			
 
				+                song_name=song_name
			
 
				+            )
			
 
				+            sid = self.db.insert_song(song_name, length, file_hash)
			
 
				+
			
 
				+            self.db.insert_hashes(sid, hashes)
			
 
				+            self.db.set_song_fingerprinted(sid)
			
 
				+            self.get_fingerprinted_songs()
			
 
				+            return sid, length, len(hashes), ""
			
 
				+
			
 
				+    def find_matches(self, samples, Fs=fingerprint.DEFAULT_FS, ads_filter=None):
			
 
				+        hashes = fingerprint.fingerprint(samples, Fs=Fs)
			
 
				+        return self.db.return_matches(hashes, ads_filter)
			
 
				+
			
 
				+    def align_matches(self, matches):
			
 
				+        """
			
 
				+            Finds hash matches that align in time with other matches and finds
			
 
				+            consensus about which hashes are "true" signal from the audio.
			
 
				+
			
 
				+            Returns a dictionary with match information.
			
 
				+        """
			
 
				+        # align by diffs
			
 
				+        diff_counter = {}
			
 
				+        largest = 0
			
 
				+        largest_offset = 0
			
 
				+        largest_count = 0
			
 
				+        song_id = -1
			
 
				+        for tup in matches:
			
 
				+            sid, diff, offset = tup
			
 
				+            if diff not in diff_counter:
			
 
				+                diff_counter[diff] = {}
			
 
				+            if sid not in diff_counter[diff]:
			
 
				+                diff_counter[diff][sid] = 0
			
 
				+            diff_counter[diff][sid] += 1
			
 
				+
			
 
				+            if diff_counter[diff][sid] > largest_count:
			
 
				+                largest = diff
			
 
				+                largest_offset = offset
			
 
				+                largest_count = diff_counter[diff][sid]
			
 
				+                song_id = sid
			
 
				+
			
 
				+        # extract idenfication
			
 
				+        song = self.db.get_song_by_id(song_id)
			
 
				+        if song:
			
 
				+            # TODO: Clarify what `get_song_by_id` should return.
			
 
				+            songname = song.get(Dejavu.SONG_NAME, None)
			
 
				+        else:
			
 
				+            return None
			
 
				+
			
 
				+        # return match info
			
 
				+        nseconds = round(float(largest) / fingerprint.DEFAULT_FS *
			
 
				+                         fingerprint.DEFAULT_WINDOW_SIZE *
			
 
				+                         fingerprint.DEFAULT_OVERLAP_RATIO, 5)
			
 
				+        song = {
			
 
				+            Dejavu.SONG_ID : song_id,
			
 
				+            Dejavu.SONG_NAME : songname,
			
 
				+            Dejavu.CONFIDENCE : largest_count,
			
 
				+            Dejavu.OFFSET : int(largest),
			
 
				+            Dejavu.OFFSET_SECS : nseconds,
			
 
				+            'position': largest_offset,
			
 
				+            Database.FIELD_FILE_SHA1 : song.get(Database.FIELD_FILE_SHA1, None),}
			
 
				+        return song
			
 
				+
			
 
				+    def recognize(self, recognizer, *options, **kwoptions):
			
 
				+        r = recognizer(self)
			
 
				+        return r.recognize(*options, **kwoptions)
			
 
				+
			
 
				+
			
 
				+def _fingerprint_worker(filename, limit=None, song_name=None):
			
 
				+    # Pool.imap sends arguments as tuples so we have to unpack
			
 
				+    # them ourself.
			
 
				+    try:
			
 
				+        filename, limit = filename
			
 
				+    except ValueError:
			
 
				+        pass
			
 
				+
			
 
				+    songname, extension = os.path.splitext(os.path.basename(filename))
			
 
				+    song_name = song_name or songname
			
 
				+    channels, Fs, file_hash, length = decoder.read(filename, limit)
			
 
				+    result = set()
			
 
				+    channel_amount = len(channels)
			
 
				+
			
 
				+    for channeln, channel in enumerate(channels):
			
 
				+        # TODO: Remove prints or change them into optional logging.
			
 
				+        #print("Fingerprinting channel %d/%d for %s" % (channeln + 1,
			
 
				+        #                                               channel_amount,
			
 
				+        #                                               filename))
			
 
				+        hashes = fingerprint.fingerprint(channel, Fs=Fs)
			
 
				+        #print("Finished channel %d/%d for %s" % (channeln + 1, channel_amount,
			
 
				+        #                                         filename))
			
 
				+        result |= set(hashes)
			
 
				+
			
 
				+    return song_name, length, result, file_hash
			
 
				+
			
 
				+
			
 
				+def chunkify(lst, n):
			
 
				+    """
			
 
				+    Splits a list into roughly n equal parts.
			
 
				+    http://stackoverflow.com/questions/2130016/splitting-a-list-of-arbitrary-size-into-only-roughly-n-equal-parts
			
 
				+    """
			
 
				+    return [lst[i::n] for i in xrange(n)]
			
--- a/ondemand/dejavu/database.py
+++ b/ondemand/dejavu/database.py
@@ -0,0 +1,175 @@
 
				+from __future__ import absolute_import
			
 
				+import abc
			
 
				+
			
 
				+
			
 
				+class Database(object):
			
 
				+    __metaclass__ = abc.ABCMeta
			
 
				+
			
 
				+    FIELD_FILE_SHA1 = 'file_sha1'
			
 
				+    FIELD_SONG_ID = 'id'
			
 
				+    FIELD_SONGNAME = 'name'
			
 
				+    FIELD_LENGTH = 'length'
			
 
				+    FIELD_OFFSET = 'offset'
			
 
				+    FIELD_HASH = 'hash'
			
 
				+
			
 
				+    # Name of your Database subclass, this is used in configuration
			
 
				+    # to refer to your class
			
 
				+    type = None
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(Database, self).__init__()
			
 
				+
			
 
				+    def before_fork(self):
			
 
				+        """
			
 
				+        Called before the database instance is given to the new process
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    def after_fork(self):
			
 
				+        """
			
 
				+        Called after the database instance has been given to the new process
			
 
				+
			
 
				+        This will be called in the new process.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    def setup(self):
			
 
				+        """
			
 
				+        Called on creation or shortly afterwards.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def empty(self):
			
 
				+        """
			
 
				+        Called when the database should be cleared of all data.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def delete_unfingerprinted_songs(self):
			
 
				+        """
			
 
				+        Called to remove any song entries that do not have any fingerprints
			
 
				+        associated with them.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_num_songs(self):
			
 
				+        """
			
 
				+        Returns the amount of songs in the database.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_num_fingerprints(self):
			
 
				+        """
			
 
				+        Returns the number of fingerprints in the database.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def set_song_fingerprinted(self, sid):
			
 
				+        """
			
 
				+        Sets a specific song as having all fingerprints in the database.
			
 
				+
			
 
				+        sid: Song identifier
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_songs(self):
			
 
				+        """
			
 
				+        Returns all fully fingerprinted songs in the database.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_song_by_id(self, sid):
			
 
				+        """
			
 
				+        Return a song by its identifier
			
 
				+
			
 
				+        sid: Song identifier
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def insert(self, hash, sid, offset):
			
 
				+        """
			
 
				+        Inserts a single fingerprint into the database.
			
 
				+
			
 
				+          hash: Part of a sha1 hash, in hexadecimal format
			
 
				+           sid: Song identifier this fingerprint is off
			
 
				+        offset: The offset this hash is from
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def insert_song(self, song_name):
			
 
				+        """
			
 
				+        Inserts a song name into the database, returns the new
			
 
				+        identifier of the song.
			
 
				+
			
 
				+        song_name: The name of the song.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def query(self, hash):
			
 
				+        """
			
 
				+        Returns all matching fingerprint entries associated with
			
 
				+        the given hash as parameter.
			
 
				+
			
 
				+        hash: Part of a sha1 hash, in hexadecimal format
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_iterable_kv_pairs(self):
			
 
				+        """
			
 
				+        Returns all fingerprints in the database.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def insert_hashes(self, sid, hashes):
			
 
				+        """
			
 
				+        Insert a multitude of fingerprints.
			
 
				+
			
 
				+           sid: Song identifier the fingerprints belong to
			
 
				+        hashes: A sequence of tuples in the format (hash, offset)
			
 
				+        -   hash: Part of a sha1 hash, in hexadecimal format
			
 
				+        - offset: Offset this hash was created from/at.
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def return_matches(self, hashes, ads_filter):
			
 
				+        """
			
 
				+        Searches the database for pairs of (hash, offset) values.
			
 
				+
			
 
				+        hashes: A sequence of tuples in the format (hash, offset)
			
 
				+        -   hash: Part of a sha1 hash, in hexadecimal format
			
 
				+        - offset: Offset this hash was created from/at.
			
 
				+
			
 
				+        Returns a sequence of (sid, offset_difference) tuples.
			
 
				+
			
 
				+                      sid: Song identifier
			
 
				+        offset_difference: (offset - database_offset)
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+def get_database(database_type='None'):
			
 
				+    # Default to using the mysql database
			
 
				+    database_type = database_type or "mysql"
			
 
				+    # Lower all the input.
			
 
				+    database_type = database_type.lower()
			
 
				+
			
 
				+    for db_cls in Database.__subclasses__():
			
 
				+        if db_cls.type == database_type:
			
 
				+            return db_cls
			
 
				+
			
 
				+    raise TypeError("Unsupported database type supplied.")
			
 
				+
			
 
				+import dejavu.database_mem
			
--- a/ondemand/dejavu/database_mem.py
+++ b/ondemand/dejavu/database_mem.py
@@ -0,0 +1,372 @@
 
				+from __future__ import absolute_import
			
 
				+from dejavu.database import Database
			
 
				+import sys
			
 
				+
			
 
				+if sys.version_info >= (3, 0):
			
 
				+    from itertools import zip_longest as izip_longest
			
 
				+    import queue as Queue
			
 
				+else:
			
 
				+    from itertools import izip_longest
			
 
				+    import Queue
			
 
				+
			
 
				+PY3 = sys.version_info >= (3, 0)
			
 
				+
			
 
				+
			
 
				+class MEMDatabase(Database):
			
 
				+    """
			
 
				+    Queries:
			
 
				+
			
 
				+    1) Find duplicates (shouldn't be any, though):
			
 
				+
			
 
				+        select `hash`, `song_id`, `offset`, count(*) cnt
			
 
				+        from fingerprints
			
 
				+        group by `hash`, `song_id`, `offset`
			
 
				+        having cnt > 1
			
 
				+        order by cnt asc;
			
 
				+
			
 
				+    2) Get number of hashes by song:
			
 
				+
			
 
				+        select song_id, song_name, count(song_id) as num
			
 
				+        from fingerprints
			
 
				+        natural join songs
			
 
				+        group by song_id
			
 
				+        order by count(song_id) desc;
			
 
				+
			
 
				+    3) get hashes with highest number of collisions
			
 
				+
			
 
				+        select
			
 
				+            hash,
			
 
				+            count(distinct song_id) as n
			
 
				+        from fingerprints
			
 
				+        group by `hash`
			
 
				+        order by n DESC;
			
 
				+
			
 
				+    => 26 different songs with same fingerprint (392 times):
			
 
				+
			
 
				+        select songs.song_name, fingerprints.offset
			
 
				+        from fingerprints natural join songs
			
 
				+        where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73";
			
 
				+    """
			
 
				+
			
 
				+    type = "mem"
			
 
				+
			
 
				+    # tables
			
 
				+    FINGERPRINTS_TABLENAME = "fingerprints"
			
 
				+    SONGS_TABLENAME = "ads"
			
 
				+
			
 
				+    # fields
			
 
				+    FIELD_FINGERPRINTED = "fingerprinted"
			
 
				+
			
 
				+    # creates
			
 
				+    CREATE_FINGERPRINTS_TABLE = """
			
 
				+        CREATE TABLE IF NOT EXISTS `%s` (
			
 
				+             `%s` binary(10) not null,
			
 
				+             `%s` int unsigned not null,
			
 
				+             `%s` int unsigned not null,
			
 
				+         INDEX (%s),
			
 
				+         UNIQUE KEY `unique_constraint` (%s, %s, %s),
			
 
				+         FOREIGN KEY (%s) REFERENCES %s(%s) ON DELETE CASCADE
			
 
				+    ) ENGINE=INNODB;""" % (
			
 
				+        FINGERPRINTS_TABLENAME, Database.FIELD_HASH,
			
 
				+        Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
			
 
				+        Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
			
 
				+        Database.FIELD_SONG_ID, SONGS_TABLENAME, Database.FIELD_SONG_ID
			
 
				+    )
			
 
				+
			
 
				+    CREATE_SONGS_TABLE = """
			
 
				+        CREATE TABLE IF NOT EXISTS `%s` (
			
 
				+            `%s` int unsigned not null auto_increment,
			
 
				+            `%s` varchar(250) not null,
			
 
				+            `%s` bigint default 0,
			
 
				+            `%s` tinyint default 0,
			
 
				+            `%s` binary(20) not null,
			
 
				+        PRIMARY KEY (`%s`),
			
 
				+        UNIQUE KEY `%s` (`%s`)
			
 
				+    ) ENGINE=INNODB;""" % (
			
 
				+        SONGS_TABLENAME, Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_LENGTH, FIELD_FINGERPRINTED,
			
 
				+        Database.FIELD_FILE_SHA1,
			
 
				+        Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, Database.FIELD_SONG_ID,
			
 
				+    )
			
 
				+
			
 
				+    # inserts (ignores duplicates)
			
 
				+    INSERT_FINGERPRINT = """
			
 
				+        INSERT IGNORE INTO %s (%s, %s, %s) values
			
 
				+            (UNHEX(%%s), %%s, %%s);
			
 
				+    """ % (FINGERPRINTS_TABLENAME, Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET)
			
 
				+
			
 
				+    INSERT_SONG = "INSERT INTO %s (%s, %s, %s) values (%%s, %%s, UNHEX(%%s));" % (
			
 
				+        SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_LENGTH, Database.FIELD_FILE_SHA1)
			
 
				+
			
 
				+    # selects
			
 
				+    SELECT = """
			
 
				+        SELECT %s, %s FROM %s WHERE %s = UNHEX(%%s);
			
 
				+    """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
			
 
				+
			
 
				+    SELECT_MULTIPLE = """
			
 
				+        SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s);
			
 
				+    """ % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET,
			
 
				+           FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
			
 
				+
			
 
				+    SELECT_MULTIPLE_FILTER = """
			
 
				+        SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s) AND id IN (%%s);
			
 
				+    """ % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET,
			
 
				+           FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
			
 
				+
			
 
				+    SELECT_ALL = """
			
 
				+        SELECT %s, %s FROM %s;
			
 
				+    """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME)
			
 
				+
			
 
				+    SELECT_SONG = """
			
 
				+        SELECT %s, HEX(%s) as %s FROM %s WHERE %s = %%s;
			
 
				+    """ % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, SONGS_TABLENAME, Database.FIELD_SONG_ID)
			
 
				+
			
 
				+    SELECT_NUM_FINGERPRINTS = """
			
 
				+        SELECT COUNT(*) as n FROM %s
			
 
				+    """ % (FINGERPRINTS_TABLENAME)
			
 
				+
			
 
				+    SELECT_UNIQUE_SONG_IDS = """
			
 
				+        SELECT COUNT(DISTINCT %s) as n FROM %s WHERE %s = 1;
			
 
				+    """ % (Database.FIELD_SONG_ID, SONGS_TABLENAME, FIELD_FINGERPRINTED)
			
 
				+
			
 
				+    SELECT_SONGS = """
			
 
				+        SELECT %s, %s, HEX(%s) as %s FROM %s WHERE %s = 1;
			
 
				+    """ % (Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1,
			
 
				+           SONGS_TABLENAME, FIELD_FINGERPRINTED)
			
 
				+
			
 
				+    # drops
			
 
				+    DROP_FINGERPRINTS = "DROP TABLE IF EXISTS %s;" % FINGERPRINTS_TABLENAME
			
 
				+    DROP_SONGS = "DROP TABLE IF EXISTS %s;" % SONGS_TABLENAME
			
 
				+
			
 
				+    # update
			
 
				+    UPDATE_SONG_FINGERPRINTED = """
			
 
				+        UPDATE %s SET %s = 1 WHERE %s = %%s
			
 
				+    """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED, Database.FIELD_SONG_ID)
			
 
				+
			
 
				+    # delete
			
 
				+    DELETE_UNFINGERPRINTED = """
			
 
				+        DELETE FROM %s WHERE %s = 0;
			
 
				+    """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED)
			
 
				+
			
 
				+    def __init__(self, **options):
			
 
				+        super(MEMDatabase, self).__init__()
			
 
				+        self._options = options
			
 
				+        
			
 
				+
			
 
				+    def after_fork(self):
			
 
				+        raise Exception("not implemented")
			
 
				+        # Clear the cursor cache, we don't want any stale connections from
			
 
				+        # the previous process.
			
 
				+        Cursor.clear_cache()
			
 
				+
			
 
				+    def setup(self):
			
 
				+        """
			
 
				+        Creates any non-existing tables required for dejavu to function.
			
 
				+
			
 
				+        This also removes all songs that have been added but have no
			
 
				+        fingerprints associated with them.
			
 
				+        """
			
 
				+        self.songs = {}
			
 
				+        self.fingerprints = {}
			
 
				+        #with self.cursor() as cur:
			
 
				+        #    cur.execute(self.CREATE_SONGS_TABLE)
			
 
				+        #    cur.execute(self.CREATE_FINGERPRINTS_TABLE)
			
 
				+        #    cur.execute(self.DELETE_UNFINGERPRINTED)
			
 
				+
			
 
				+    def empty(self):
			
 
				+        raise Exception("not implemented")
			
 
				+        """
			
 
				+        Drops tables created by dejavu and then creates them again
			
 
				+        by calling `SQLDatabase.setup`.
			
 
				+
			
 
				+        .. warning:
			
 
				+            This will result in a loss of data
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.DROP_FINGERPRINTS)
			
 
				+            cur.execute(self.DROP_SONGS)
			
 
				+
			
 
				+        self.setup()
			
 
				+
			
 
				+    def delete_unfingerprinted_songs(self):
			
 
				+        raise Exception("not implemented")
			
 
				+        """
			
 
				+        Removes all songs that have no fingerprints associated with them.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.DELETE_UNFINGERPRINTED)
			
 
				+
			
 
				+    def get_num_songs(self):
			
 
				+        raise Exception("not implemented")
			
 
				+        """
			
 
				+        Returns number of songs the database has fingerprinted.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.SELECT_UNIQUE_SONG_IDS)
			
 
				+
			
 
				+            for count, in cur:
			
 
				+                return count
			
 
				+            return 0
			
 
				+
			
 
				+    def get_num_fingerprints(self):
			
 
				+        raise Exception("not implemented")
			
 
				+        """
			
 
				+        Returns number of fingerprints the database has fingerprinted.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.SELECT_NUM_FINGERPRINTS)
			
 
				+
			
 
				+            for count, in cur:
			
 
				+                return count
			
 
				+            return 0
			
 
				+
			
 
				+    def set_song_fingerprinted(self, sid):
			
 
				+        """
			
 
				+        Set the fingerprinted flag to TRUE (1) once a song has been completely
			
 
				+        fingerprinted in the database.
			
 
				+        """
			
 
				+        self.songs[sid]["fingerprinted"] = 1
			
 
				+        #with self.cursor() as cur:
			
 
				+        #    cur.execute(self.UPDATE_SONG_FINGERPRINTED, (sid,))
			
 
				+
			
 
				+    def get_songs(self):
			
 
				+        """
			
 
				+        Return songs that have the fingerprinted flag set TRUE (1).
			
 
				+        """
			
 
				+        if PY3:
			
 
				+            for _, song in self.songs.items():
			
 
				+                yield song
			
 
				+        else:
			
 
				+            for _, song in self.songs.iteritems():
			
 
				+                yield song
			
 
				+        #with self.cursor(cursor_type=DictCursor) as cur:
			
 
				+        #    cur.execute(self.SELECT_SONGS)
			
 
				+        #    for row in cur:
			
 
				+        #        yield row
			
 
				+
			
 
				+    def get_song_by_id(self, sid):
			
 
				+        """
			
 
				+        Returns song by its ID.
			
 
				+        """
			
 
				+        return self.songs.get(sid, None)
			
 
				+        #with self.cursor(cursor_type=DictCursor) as cur:
			
 
				+        #    cur.execute(self.SELECT_SONG, (sid,))
			
 
				+        #    return cur.fetchone()
			
 
				+
			
 
				+    def insert(self, hash, sid, offset):
			
 
				+        raise Exception("not implemented")
			
 
				+        """
			
 
				+        Insert a (sha1, song_id, offset) row into database.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.INSERT_FINGERPRINT, (hash, sid, offset))
			
 
				+
			
 
				+    def insert_song(self, songname, length, file_hash):
			
 
				+        """
			
 
				+        Inserts song in the database and returns the ID of the inserted record.
			
 
				+        """
			
 
				+        self.songs[file_hash] = {
			
 
				+                Database.FIELD_SONG_ID: file_hash,
			
 
				+                Database.FIELD_SONGNAME: songname,
			
 
				+                Database.FIELD_LENGTH: length,
			
 
				+                "fingerprinted": 0,
			
 
				+                Database.FIELD_FILE_SHA1: file_hash
			
 
				+                }
			
 
				+        return file_hash
			
 
				+        #with self.cursor() as cur:
			
 
				+        #    cur.execute(self.INSERT_SONG, (songname, length, file_hash))
			
 
				+        #    return cur.lastrowid
			
 
				+
			
 
				+    def query(self, hash):
			
 
				+        raise Exception("not implemented")
			
 
				+        """
			
 
				+        Return all tuples associated with hash.
			
 
				+
			
 
				+        If hash is None, returns all entries in the
			
 
				+        database (be careful with that one!).
			
 
				+        """
			
 
				+        # select all if no key
			
 
				+        query = self.SELECT_ALL if hash is None else self.SELECT
			
 
				+
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(query)
			
 
				+            for sid, offset in cur:
			
 
				+                yield (sid, offset)
			
 
				+
			
 
				+    def get_iterable_kv_pairs(self):
			
 
				+        raise Exception("not implemented")
			
 
				+        """
			
 
				+        Returns all tuples in database.
			
 
				+        """
			
 
				+        return self.query(None)
			
 
				+
			
 
				+    def insert_hashes(self, sid, hashes):
			
 
				+        """
			
 
				+        Insert series of hash => song_id, offset
			
 
				+        values into the database.
			
 
				+        """
			
 
				+        #values = {}
			
 
				+        for hash, offset in hashes:
			
 
				+            self.fingerprints[hash] = {"sid": sid, "offset": offset}
			
 
				+
			
 
				+        #self.fingerprints[sid] = values
			
 
				+
			
 
				+        #values = []
			
 
				+        #for hash, offset in hashes:
			
 
				+        #    values.append((hash, sid, offset))
			
 
				+
			
 
				+        #with self.cursor() as cur:
			
 
				+        #    for split_values in grouper(values, 1000):
			
 
				+        #        cur.executemany(self.INSERT_FINGERPRINT, split_values)
			
 
				+
			
 
				+    def return_matches(self, hashes, ads_filter=None):
			
 
				+        """
			
 
				+        Return the (song_id, offset_diff) tuples associated with
			
 
				+        a list of (sha1, sample_offset) values.
			
 
				+        """
			
 
				+        for hash, offset in hashes:
			
 
				+            el = self.fingerprints.get(hash, None)
			
 
				+            if el is not None:
			
 
				+                yield (el["sid"], el["offset"] - offset, el["offset"])
			
 
				+
			
 
				+        return
			
 
				+        # Create a dictionary of hash => offset pairs for later lookups
			
 
				+        mapper = {}
			
 
				+        for hash, offset in hashes:
			
 
				+            mapper[hash.upper()] = offset
			
 
				+
			
 
				+        # Get an iteratable of all the hashes we need
			
 
				+        values = mapper.keys()
			
 
				+
			
 
				+        with self.cursor() as cur:
			
 
				+            for split_values in grouper(values, 1000):
			
 
				+                # Create our IN part of the query
			
 
				+                if ads_filter is None:
			
 
				+                    query = self.SELECT_MULTIPLE
			
 
				+                    query = query % ', '.join(['UNHEX(%s)'] * len(split_values))
			
 
				+                else:
			
 
				+                    query = self.SELECT_MULTIPLE_FILTER
			
 
				+                    query = query % (', '.join(['UNHEX(%s)'] * len(split_values)), ",".join(ads_filter))
			
 
				+
			
 
				+                #if ads_filter:
			
 
				+                    #query = query + " and id in (" + ",".join(ads_filter) + ")"
			
 
				+
			
 
				+                cur.execute(query, split_values)
			
 
				+
			
 
				+                for hash, sid, offset in cur:
			
 
				+                    # (sid, db_offset - song_sampled_offset)
			
 
				+                    yield (sid, offset - mapper[hash], offset)
			
 
				+
			
 
				+    def __getstate__(self):
			
 
				+        raise Exception("not implemented")
			
 
				+        return (self._options,)
			
 
				+
			
 
				+    def __setstate__(self, state):
			
 
				+        raise Exception("not implemented")
			
 
				+        self._options, = state
			
 
				+        self.cursor = cursor_factory(**self._options)
			
 
				+
			
 
				+
			
 
				+def grouper(iterable, n, fillvalue=None):
			
 
				+    args = [iter(iterable)] * n
			
 
				+    return (filter(None, values) for values
			
 
				+            in izip_longest(fillvalue=fillvalue, *args))
			
--- a/ondemand/dejavu/database_sql.py
+++ b/ondemand/dejavu/database_sql.py
@@ -0,0 +1,386 @@
 
				+from __future__ import absolute_import
			
 
				+from itertools import izip_longest
			
 
				+import Queue
			
 
				+
			
 
				+import MySQLdb as mysql
			
 
				+from MySQLdb.cursors import DictCursor
			
 
				+
			
 
				+from dejavu.database import Database
			
 
				+
			
 
				+
			
 
				+class SQLDatabase(Database):
			
 
				+    """
			
 
				+    Queries:
			
 
				+
			
 
				+    1) Find duplicates (shouldn't be any, though):
			
 
				+
			
 
				+        select `hash`, `song_id`, `offset`, count(*) cnt
			
 
				+        from fingerprints
			
 
				+        group by `hash`, `song_id`, `offset`
			
 
				+        having cnt > 1
			
 
				+        order by cnt asc;
			
 
				+
			
 
				+    2) Get number of hashes by song:
			
 
				+
			
 
				+        select song_id, song_name, count(song_id) as num
			
 
				+        from fingerprints
			
 
				+        natural join songs
			
 
				+        group by song_id
			
 
				+        order by count(song_id) desc;
			
 
				+
			
 
				+    3) get hashes with highest number of collisions
			
 
				+
			
 
				+        select
			
 
				+            hash,
			
 
				+            count(distinct song_id) as n
			
 
				+        from fingerprints
			
 
				+        group by `hash`
			
 
				+        order by n DESC;
			
 
				+
			
 
				+    => 26 different songs with same fingerprint (392 times):
			
 
				+
			
 
				+        select songs.song_name, fingerprints.offset
			
 
				+        from fingerprints natural join songs
			
 
				+        where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73";
			
 
				+    """
			
 
				+
			
 
				+    type = "mysql"
			
 
				+
			
 
				+    # tables
			
 
				+    FINGERPRINTS_TABLENAME = "fingerprints"
			
 
				+    SONGS_TABLENAME = "ads"
			
 
				+
			
 
				+    # fields
			
 
				+    FIELD_FINGERPRINTED = "fingerprinted"
			
 
				+
			
 
				+    # creates
			
 
				+    CREATE_FINGERPRINTS_TABLE = """
			
 
				+        CREATE TABLE IF NOT EXISTS `%s` (
			
 
				+             `%s` binary(10) not null,
			
 
				+             `%s` int unsigned not null,
			
 
				+             `%s` int unsigned not null,
			
 
				+         INDEX (%s),
			
 
				+         UNIQUE KEY `unique_constraint` (%s, %s, %s),
			
 
				+         FOREIGN KEY (%s) REFERENCES %s(%s) ON DELETE CASCADE
			
 
				+    ) ENGINE=INNODB;""" % (
			
 
				+        FINGERPRINTS_TABLENAME, Database.FIELD_HASH,
			
 
				+        Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
			
 
				+        Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
			
 
				+        Database.FIELD_SONG_ID, SONGS_TABLENAME, Database.FIELD_SONG_ID
			
 
				+    )
			
 
				+
			
 
				+    CREATE_SONGS_TABLE = """
			
 
				+        CREATE TABLE IF NOT EXISTS `%s` (
			
 
				+            `%s` int unsigned not null auto_increment,
			
 
				+            `%s` varchar(250) not null,
			
 
				+            `%s` bigint default 0,
			
 
				+            `%s` tinyint default 0,
			
 
				+            `%s` binary(20) not null,
			
 
				+        PRIMARY KEY (`%s`),
			
 
				+        UNIQUE KEY `%s` (`%s`)
			
 
				+    ) ENGINE=INNODB;""" % (
			
 
				+        SONGS_TABLENAME, Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_LENGTH, FIELD_FINGERPRINTED,
			
 
				+        Database.FIELD_FILE_SHA1,
			
 
				+        Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, Database.FIELD_SONG_ID,
			
 
				+    )
			
 
				+
			
 
				+    # inserts (ignores duplicates)
			
 
				+    INSERT_FINGERPRINT = """
			
 
				+        INSERT IGNORE INTO %s (%s, %s, %s) values
			
 
				+            (UNHEX(%%s), %%s, %%s);
			
 
				+    """ % (FINGERPRINTS_TABLENAME, Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET)
			
 
				+
			
 
				+    INSERT_SONG = "INSERT INTO %s (%s, %s, %s) values (%%s, %%s, UNHEX(%%s));" % (
			
 
				+        SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_LENGTH, Database.FIELD_FILE_SHA1)
			
 
				+
			
 
				+    # selects
			
 
				+    SELECT = """
			
 
				+        SELECT %s, %s FROM %s WHERE %s = UNHEX(%%s);
			
 
				+    """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
			
 
				+
			
 
				+    SELECT_MULTIPLE = """
			
 
				+        SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s);
			
 
				+    """ % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET,
			
 
				+           FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
			
 
				+
			
 
				+    SELECT_MULTIPLE_FILTER = """
			
 
				+        SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s) AND id IN (%%s);
			
 
				+    """ % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET,
			
 
				+           FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
			
 
				+
			
 
				+    SELECT_ALL = """
			
 
				+        SELECT %s, %s FROM %s;
			
 
				+    """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME)
			
 
				+
			
 
				+    SELECT_SONG = """
			
 
				+        SELECT %s, HEX(%s) as %s FROM %s WHERE %s = %%s;
			
 
				+    """ % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, SONGS_TABLENAME, Database.FIELD_SONG_ID)
			
 
				+
			
 
				+    SELECT_NUM_FINGERPRINTS = """
			
 
				+        SELECT COUNT(*) as n FROM %s
			
 
				+    """ % (FINGERPRINTS_TABLENAME)
			
 
				+
			
 
				+    SELECT_UNIQUE_SONG_IDS = """
			
 
				+        SELECT COUNT(DISTINCT %s) as n FROM %s WHERE %s = 1;
			
 
				+    """ % (Database.FIELD_SONG_ID, SONGS_TABLENAME, FIELD_FINGERPRINTED)
			
 
				+
			
 
				+    SELECT_SONGS = """
			
 
				+        SELECT %s, %s, HEX(%s) as %s FROM %s WHERE %s = 1;
			
 
				+    """ % (Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1,
			
 
				+           SONGS_TABLENAME, FIELD_FINGERPRINTED)
			
 
				+
			
 
				+    # drops
			
 
				+    DROP_FINGERPRINTS = "DROP TABLE IF EXISTS %s;" % FINGERPRINTS_TABLENAME
			
 
				+    DROP_SONGS = "DROP TABLE IF EXISTS %s;" % SONGS_TABLENAME
			
 
				+
			
 
				+    # update
			
 
				+    UPDATE_SONG_FINGERPRINTED = """
			
 
				+        UPDATE %s SET %s = 1 WHERE %s = %%s
			
 
				+    """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED, Database.FIELD_SONG_ID)
			
 
				+
			
 
				+    # delete
			
 
				+    DELETE_UNFINGERPRINTED = """
			
 
				+        DELETE FROM %s WHERE %s = 0;
			
 
				+    """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED)
			
 
				+
			
 
				+    def __init__(self, **options):
			
 
				+        super(SQLDatabase, self).__init__()
			
 
				+        self.cursor = cursor_factory(**options)
			
 
				+        self._options = options
			
 
				+
			
 
				+    def after_fork(self):
			
 
				+        # Clear the cursor cache, we don't want any stale connections from
			
 
				+        # the previous process.
			
 
				+        Cursor.clear_cache()
			
 
				+
			
 
				+    def setup(self):
			
 
				+        """
			
 
				+        Creates any non-existing tables required for dejavu to function.
			
 
				+
			
 
				+        This also removes all songs that have been added but have no
			
 
				+        fingerprints associated with them.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.CREATE_SONGS_TABLE)
			
 
				+            cur.execute(self.CREATE_FINGERPRINTS_TABLE)
			
 
				+            cur.execute(self.DELETE_UNFINGERPRINTED)
			
 
				+
			
 
				+    def empty(self):
			
 
				+        """
			
 
				+        Drops tables created by dejavu and then creates them again
			
 
				+        by calling `SQLDatabase.setup`.
			
 
				+
			
 
				+        .. warning:
			
 
				+            This will result in a loss of data
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.DROP_FINGERPRINTS)
			
 
				+            cur.execute(self.DROP_SONGS)
			
 
				+
			
 
				+        self.setup()
			
 
				+
			
 
				+    def delete_unfingerprinted_songs(self):
			
 
				+        """
			
 
				+        Removes all songs that have no fingerprints associated with them.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.DELETE_UNFINGERPRINTED)
			
 
				+
			
 
				+    def get_num_songs(self):
			
 
				+        """
			
 
				+        Returns number of songs the database has fingerprinted.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.SELECT_UNIQUE_SONG_IDS)
			
 
				+
			
 
				+            for count, in cur:
			
 
				+                return count
			
 
				+            return 0
			
 
				+
			
 
				+    def get_num_fingerprints(self):
			
 
				+        """
			
 
				+        Returns number of fingerprints the database has fingerprinted.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.SELECT_NUM_FINGERPRINTS)
			
 
				+
			
 
				+            for count, in cur:
			
 
				+                return count
			
 
				+            return 0
			
 
				+
			
 
				+    def set_song_fingerprinted(self, sid):
			
 
				+        """
			
 
				+        Set the fingerprinted flag to TRUE (1) once a song has been completely
			
 
				+        fingerprinted in the database.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.UPDATE_SONG_FINGERPRINTED, (sid,))
			
 
				+
			
 
				+    def get_songs(self):
			
 
				+        """
			
 
				+        Return songs that have the fingerprinted flag set TRUE (1).
			
 
				+        """
			
 
				+        with self.cursor(cursor_type=DictCursor) as cur:
			
 
				+            cur.execute(self.SELECT_SONGS)
			
 
				+            for row in cur:
			
 
				+                yield row
			
 
				+
			
 
				+    def get_song_by_id(self, sid):
			
 
				+        """
			
 
				+        Returns song by its ID.
			
 
				+        """
			
 
				+        with self.cursor(cursor_type=DictCursor) as cur:
			
 
				+            cur.execute(self.SELECT_SONG, (sid,))
			
 
				+            return cur.fetchone()
			
 
				+
			
 
				+    def insert(self, hash, sid, offset):
			
 
				+        """
			
 
				+        Insert a (sha1, song_id, offset) row into database.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.INSERT_FINGERPRINT, (hash, sid, offset))
			
 
				+
			
 
				+    def insert_song(self, songname, length, file_hash):
			
 
				+        """
			
 
				+        Inserts song in the database and returns the ID of the inserted record.
			
 
				+        """
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(self.INSERT_SONG, (songname, length, file_hash))
			
 
				+            return cur.lastrowid
			
 
				+
			
 
				+    def query(self, hash):
			
 
				+        """
			
 
				+        Return all tuples associated with hash.
			
 
				+
			
 
				+        If hash is None, returns all entries in the
			
 
				+        database (be careful with that one!).
			
 
				+        """
			
 
				+        # select all if no key
			
 
				+        query = self.SELECT_ALL if hash is None else self.SELECT
			
 
				+
			
 
				+        with self.cursor() as cur:
			
 
				+            cur.execute(query)
			
 
				+            for sid, offset in cur:
			
 
				+                yield (sid, offset)
			
 
				+
			
 
				+    def get_iterable_kv_pairs(self):
			
 
				+        """
			
 
				+        Returns all tuples in database.
			
 
				+        """
			
 
				+        return self.query(None)
			
 
				+
			
 
				+    def insert_hashes(self, sid, hashes):
			
 
				+        """
			
 
				+        Insert series of hash => song_id, offset
			
 
				+        values into the database.
			
 
				+        """
			
 
				+        values = []
			
 
				+        for hash, offset in hashes:
			
 
				+            values.append((hash, sid, offset))
			
 
				+
			
 
				+        with self.cursor() as cur:
			
 
				+            for split_values in grouper(values, 1000):
			
 
				+                cur.executemany(self.INSERT_FINGERPRINT, split_values)
			
 
				+
			
 
				+    def return_matches(self, hashes, ads_filter=None):
			
 
				+        """
			
 
				+        Return the (song_id, offset_diff) tuples associated with
			
 
				+        a list of (sha1, sample_offset) values.
			
 
				+        """
			
 
				+        # Create a dictionary of hash => offset pairs for later lookups
			
 
				+        mapper = {}
			
 
				+        for hash, offset in hashes:
			
 
				+            mapper[hash.upper()] = offset
			
 
				+
			
 
				+        # Get an iteratable of all the hashes we need
			
 
				+        values = mapper.keys()
			
 
				+
			
 
				+        with self.cursor() as cur:
			
 
				+            for split_values in grouper(values, 1000):
			
 
				+                # Create our IN part of the query
			
 
				+                if ads_filter is None:
			
 
				+                    query = self.SELECT_MULTIPLE
			
 
				+                    query = query % ', '.join(['UNHEX(%s)'] * len(split_values))
			
 
				+                else:
			
 
				+                    query = self.SELECT_MULTIPLE_FILTER
			
 
				+                    query = query % (', '.join(['UNHEX(%s)'] * len(split_values)), ",".join(ads_filter))
			
 
				+
			
 
				+                #if ads_filter:
			
 
				+                    #query = query + " and id in (" + ",".join(ads_filter) + ")"
			
 
				+
			
 
				+                cur.execute(query, split_values)
			
 
				+
			
 
				+                for hash, sid, offset in cur:
			
 
				+                    # (sid, db_offset - song_sampled_offset)
			
 
				+                    yield (sid, offset - mapper[hash], offset)
			
 
				+
			
 
				+    def __getstate__(self):
			
 
				+        return (self._options,)
			
 
				+
			
 
				+    def __setstate__(self, state):
			
 
				+        self._options, = state
			
 
				+        self.cursor = cursor_factory(**self._options)
			
 
				+
			
 
				+
			
 
				+def grouper(iterable, n, fillvalue=None):
			
 
				+    args = [iter(iterable)] * n
			
 
				+    return (filter(None, values) for values
			
 
				+            in izip_longest(fillvalue=fillvalue, *args))
			
 
				+
			
 
				+
			
 
				+def cursor_factory(**factory_options):
			
 
				+    def cursor(**options):
			
 
				+        options.update(factory_options)
			
 
				+        return Cursor(**options)
			
 
				+    return cursor
			
 
				+
			
 
				+
			
 
				+class Cursor(object):
			
 
				+    """
			
 
				+    Establishes a connection to the database and returns an open cursor.
			
 
				+
			
 
				+
			
 
				+    ```python
			
 
				+    # Use as context manager
			
 
				+    with Cursor() as cur:
			
 
				+        cur.execute(query)
			
 
				+    ```
			
 
				+    """
			
 
				+    _cache = Queue.Queue(maxsize=5)
			
 
				+
			
 
				+    def __init__(self, cursor_type=mysql.cursors.Cursor, **options):
			
 
				+        super(Cursor, self).__init__()
			
 
				+
			
 
				+        try:
			
 
				+            conn = self._cache.get_nowait()
			
 
				+        except Queue.Empty:
			
 
				+            conn = mysql.connect(**options)
			
 
				+        else:
			
 
				+            # Ping the connection before using it from the cache.
			
 
				+            conn.ping(True)
			
 
				+
			
 
				+        self.conn = conn
			
 
				+        self.conn.autocommit(False)
			
 
				+        self.cursor_type = cursor_type
			
 
				+
			
 
				+    @classmethod
			
 
				+    def clear_cache(cls):
			
 
				+        cls._cache = Queue.Queue(maxsize=5)
			
 
				+
			
 
				+    def __enter__(self):
			
 
				+        self.cursor = self.conn.cursor(self.cursor_type)
			
 
				+        return self.cursor
			
 
				+
			
 
				+    def __exit__(self, extype, exvalue, traceback):
			
 
				+        # if we had a MySQL related error we try to rollback the cursor.
			
 
				+        if extype is mysql.MySQLError:
			
 
				+            self.cursor.rollback()
			
 
				+
			
 
				+        self.cursor.close()
			
 
				+        self.conn.commit()
			
 
				+
			
 
				+        # Put it back on the queue
			
 
				+        try:
			
 
				+            self._cache.put_nowait(self.conn)
			
 
				+        except Queue.Full:
			
 
				+            self.conn.close()
			
--- a/ondemand/dejavu/decoder.py
+++ b/ondemand/dejavu/decoder.py
@@ -0,0 +1,149 @@
 
				+from __future__ import absolute_import
			
 
				+import os
			
 
				+import sys
			
 
				+import fnmatch
			
 
				+import numpy as np
			
 
				+from pydub import AudioSegment
			
 
				+from pydub.utils import audioop
			
 
				+from dejavu import wavio
			
 
				+from hashlib import sha1
			
 
				+
			
 
				+DEFAULT_FS = int(44100 / 2)
			
 
				+
			
 
				+if sys.version_info >= (3, 0):
			
 
				+    xrange = range
			
 
				+
			
 
				+def unique_hash(filepath, blocksize=2**20):
			
 
				+    """ Small function to generate a hash to uniquely generate
			
 
				+    a file. Inspired by MD5 version here:
			
 
				+    http://stackoverflow.com/a/1131255/712997
			
 
				+
			
 
				+    Works with large files. 
			
 
				+    """
			
 
				+    s = sha1()
			
 
				+    with open(filepath , "rb") as f:
			
 
				+        while True:
			
 
				+            buf = f.read(blocksize)
			
 
				+            if not buf:
			
 
				+                break
			
 
				+            s.update(buf)
			
 
				+    return s.hexdigest().upper()
			
 
				+
			
 
				+
			
 
				+def find_files(path, extensions):
			
 
				+    # Allow both with ".mp3" and without "mp3" to be used for extensions
			
 
				+    extensions = [e.replace(".", "") for e in extensions]
			
 
				+
			
 
				+    for dirpath, dirnames, files in os.walk(path):
			
 
				+        for extension in extensions:
			
 
				+            for f in fnmatch.filter(files, "*.%s" % extension):
			
 
				+                p = os.path.join(dirpath, f)
			
 
				+                yield (p, extension)
			
 
				+
			
 
				+def read_chunks(filename, chunk_size = 1, start = 0, fmt = None):
			
 
				+    start = start * 1000
			
 
				+    chunk_size = chunk_size * 1000
			
 
				+    filename_hash = unique_hash(filename)
			
 
				+    try:
			
 
				+        audiofile = AudioSegment.from_file(filename,fmt)
			
 
				+
			
 
				+        if audiofile.frame_rate != DEFAULT_FS:
			
 
				+            audiofile = audiofile.set_frame_rate(DEFAULT_FS)
			
 
				+        while True:
			
 
				+            end = start + chunk_size
			
 
				+            audio_chunk = audiofile[start:end]
			
 
				+            if len(audio_chunk) == 0:
			
 
				+                return;
			
 
				+
			
 
				+            data = np.fromstring(audio_chunk._data, np.int16)
			
 
				+
			
 
				+            channels = []
			
 
				+            for chn in xrange(audio_chunk.channels):
			
 
				+                channels.append(data[chn::audio_chunk.channels])
			
 
				+
			
 
				+            yield channels, audio_chunk.frame_rate,filename_hash, len(audio_chunk)
			
 
				+            start = end
			
 
				+    
			
 
				+    except audioop.error:
			
 
				+        fs, _, audiofile = wavio.readwav(filename)
			
 
				+        
			
 
				+        if audiofile.frame_rate != DEFAULT_FS:
			
 
				+            audiofile = audiofile.set_frame_rate(DEFAULT_FS)    
			
 
				+
			
 
				+        while True:
			
 
				+            end = start + chunk_size
			
 
				+            audio_chunk = audiofile[start:end]
			
 
				+            if len(audio_chunk) == 0:
			
 
				+                return;
			
 
				+
			
 
				+            audio_chunk = audio_chunk.T
			
 
				+            audio_chunk = audio_chunk.astype(np.int16)
			
 
				+
			
 
				+            channels = []
			
 
				+            for chn in audio_chunk:
			
 
				+                channels.append(chn)
			
 
				+
			
 
				+            yield channels, audio_chunk.frame_rate, filename_hash, len(audio_chunk)
			
 
				+            start = end
			
 
				+     
			
 
				+
			
 
				+
			
 
				+
			
 
				+def read(filename, limit=None, fmt = None, offset = 0):
			
 
				+    """
			
 
				+    Reads any file supported by pydub (ffmpeg) and returns the data contained
			
 
				+    within. If file reading fails due to input being a 24-bit wav file,
			
 
				+    wavio is used as a backup.
			
 
				+
			
 
				+    Can be optionally limited to a certain amount of seconds from the start
			
 
				+    of the file by specifying the `limit` parameter. This is the amount of
			
 
				+    seconds from the start of the file.
			
 
				+
			
 
				+    returns: (channels, samplerate)
			
 
				+    """
			
 
				+    if limit:
			
 
				+        offset = offset * 1000
			
 
				+        limit = offset + limit * 1000
			
 
				+    # pydub does not support 24-bit wav files, use wavio when this occurs
			
 
				+    try:
			
 
				+        audiofile = AudioSegment.from_file(filename,fmt)
			
 
				+
			
 
				+        if audiofile.frame_rate != DEFAULT_FS:
			
 
				+            audiofile = audiofile.set_frame_rate(DEFAULT_FS)
			
 
				+
			
 
				+        if limit:
			
 
				+            audiofile = audiofile[offset:limit]
			
 
				+
			
 
				+        data = np.fromstring(audiofile._data, np.int16)
			
 
				+
			
 
				+        channels = []
			
 
				+        for chn in xrange(audiofile.channels):
			
 
				+            channels.append(data[chn::audiofile.channels])
			
 
				+
			
 
				+        fs = audiofile.frame_rate
			
 
				+    except audioop.error:
			
 
				+        fs, _, audiofile = wavio.readwav(filename)
			
 
				+        
			
 
				+        if audiofile.frame_rate != DEFAULT_FS:
			
 
				+            audiofile = audiofile.set_frame_rate(DEFAULT_FS)    
			
 
				+
			
 
				+
			
 
				+        if limit:
			
 
				+            audiofile = audiofile[offset:limit]
			
 
				+
			
 
				+        audiofile = audiofile.T
			
 
				+        audiofile = audiofile.astype(np.int16)
			
 
				+
			
 
				+        channels = []
			
 
				+        for chn in audiofile:
			
 
				+            channels.append(chn)
			
 
				+
			
 
				+    return channels, audiofile.frame_rate, unique_hash(filename), len(audiofile)
			
 
				+
			
 
				+
			
 
				+def path_to_songname(path):
			
 
				+    """
			
 
				+    Extracts song name from a filepath. Used to identify which songs
			
 
				+    have already been fingerprinted on disk.
			
 
				+    """
			
 
				+    return os.path.splitext(os.path.basename(path))[0]
			
--- a/ondemand/dejavu/example.py
+++ b/ondemand/dejavu/example.py
@@ -0,0 +1,18 @@
 
				+from __future__ import absolute_import
			
 
				+import warnings
			
 
				+import json
			
 
				+warnings.filterwarnings("ignore")
			
 
				+
			
 
				+from fourier.dejavu import Dejavu
			
 
				+from fourier.dejavu.frecognize import FileRecognizer, MicrophoneRecognizer, StdinRecognizer
			
 
				+
			
 
				+# load config from a JSON file (or anything outputting a python dictionary)
			
 
				+with open("fourier.cnf") as f:
			
 
				+    config = json.load(f)
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+	# create a Dejavu instance
			
 
				+	djv = Dejavu(config)
			
 
				+
			
 
				+        djv.recognize(StdinRecognizer)
			
--- a/ondemand/dejavu/fingerprint.py
+++ b/ondemand/dejavu/fingerprint.py
@@ -0,0 +1,170 @@
 
				+import sys
			
 
				+import numpy as np
			
 
				+import matplotlib.mlab as mlab
			
 
				+import matplotlib.pyplot as plt
			
 
				+from scipy.ndimage.filters import maximum_filter
			
 
				+from scipy.ndimage.morphology import (generate_binary_structure,
			
 
				+                                      iterate_structure, binary_erosion)
			
 
				+import hashlib
			
 
				+from operator import itemgetter
			
 
				+
			
 
				+PY3 = sys.version_info >= (3, 0)
			
 
				+
			
 
				+IDX_FREQ_I = 0
			
 
				+IDX_TIME_J = 1
			
 
				+
			
 
				+######################################################################
			
 
				+# Sampling rate, related to the Nyquist conditions, which affects
			
 
				+# the range frequencies we can detect.
			
 
				+DEFAULT_FS = int(44100 / 2)
			
 
				+
			
 
				+######################################################################
			
 
				+# Size of the FFT window, affects frequency granularity
			
 
				+DEFAULT_WINDOW_SIZE = 4096
			
 
				+
			
 
				+######################################################################
			
 
				+# Ratio by which each sequential window overlaps the last and the
			
 
				+# next window. Higher overlap will allow a higher granularity of offset
			
 
				+# matching, but potentially more fingerprints.
			
 
				+DEFAULT_OVERLAP_RATIO = 0.4 #0.75 better accuracy much slower, default 0.5
			
 
				+
			
 
				+######################################################################
			
 
				+# Degree to which a fingerprint can be paired with its neighbors --
			
 
				+# higher will cause more fingerprints, but potentially better accuracy.
			
 
				+DEFAULT_FAN_VALUE = 15 #30 twice as many fingerprints, not much accuracy
			
 
				+
			
 
				+######################################################################
			
 
				+# Minimum amplitude in spectrogram in order to be considered a peak.
			
 
				+# This can be raised to reduce number of fingerprints, but can negatively
			
 
				+# affect accuracy.
			
 
				+DEFAULT_AMP_MIN = 10 #/2 useless
			
 
				+
			
 
				+######################################################################
			
 
				+# Number of cells around an amplitude peak in the spectrogram in order
			
 
				+# for Dejavu to consider it a spectral peak. Higher values mean less
			
 
				+# fingerprints and faster matching, but can potentially affect accuracy.
			
 
				+PEAK_NEIGHBORHOOD_SIZE = 10 #10 to augment fingerprints 3x and improve accuracy, default 20
			
 
				+
			
 
				+######################################################################
			
 
				+# Thresholds on how close or far fingerprints can be in time in order
			
 
				+# to be paired as a fingerprint. If your max is too low, higher values of
			
 
				+# DEFAULT_FAN_VALUE may not perform as expected.
			
 
				+MIN_HASH_TIME_DELTA = 0
			
 
				+MAX_HASH_TIME_DELTA = 200
			
 
				+
			
 
				+######################################################################
			
 
				+# If True, will sort peaks temporally for fingerprinting;
			
 
				+# not sorting will cut down number of fingerprints, but potentially
			
 
				+# affect performance.
			
 
				+PEAK_SORT = True
			
 
				+
			
 
				+######################################################################
			
 
				+# Number of bits to throw away from the front of the SHA1 hash in the
			
 
				+# fingerprint calculation. The more you throw away, the less storage, but
			
 
				+# potentially higher collisions and misclassifications when identifying songs.
			
 
				+FINGERPRINT_REDUCTION = 20 #1 for much less storage, leave at 20, don't change
			
 
				+
			
 
				+def fingerprint(channel_samples, Fs=DEFAULT_FS,
			
 
				+                wsize=DEFAULT_WINDOW_SIZE,
			
 
				+                wratio=DEFAULT_OVERLAP_RATIO,
			
 
				+                fan_value=DEFAULT_FAN_VALUE,
			
 
				+                amp_min=DEFAULT_AMP_MIN):
			
 
				+    """
			
 
				+    FFT the channel, log transform output, find local maxima, then return
			
 
				+    locally sensitive hashes.
			
 
				+    """
			
 
				+    # FFT the signal and extract frequency components
			
 
				+    arr2D = mlab.specgram(
			
 
				+        channel_samples,
			
 
				+        NFFT=wsize,
			
 
				+        Fs=Fs,
			
 
				+        window=mlab.window_hanning,
			
 
				+        noverlap=int(wsize * wratio))[0]
			
 
				+
			
 
				+    # apply log transform since specgram() returns linear array
			
 
				+    arr2D = 10 * np.log10(arr2D)
			
 
				+    arr2D[arr2D == -np.inf] = 0  # replace infs with zeros
			
 
				+
			
 
				+    # find local maxima
			
 
				+    local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
			
 
				+
			
 
				+    # return hashes
			
 
				+    return generate_hashes(local_maxima, fan_value=fan_value)
			
 
				+
			
 
				+
			
 
				+def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
			
 
				+    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure
			
 
				+    struct = generate_binary_structure(2, 1)
			
 
				+    neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE)
			
 
				+
			
 
				+    # find local maxima using our fliter shape
			
 
				+    local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
			
 
				+    background = (arr2D == 0)
			
 
				+    eroded_background = binary_erosion(background, structure=neighborhood,
			
 
				+                                       border_value=1)
			
 
				+
			
 
				+    # Boolean mask of arr2D with True at peaks
			
 
				+    detected_peaks = local_max ^ eroded_background
			
 
				+
			
 
				+    # extract peaks
			
 
				+    amps = arr2D[detected_peaks]
			
 
				+    j, i = np.where(detected_peaks)
			
 
				+
			
 
				+    # filter peaks
			
 
				+    amps = amps.flatten()
			
 
				+    peaks = zip(i, j, amps)
			
 
				+    peaks_filtered = [x for x in peaks if x[2] > amp_min]  # freq, time, amp
			
 
				+
			
 
				+    # get indices for frequency and time
			
 
				+    frequency_idx = [x[1] for x in peaks_filtered]
			
 
				+    time_idx = [x[0] for x in peaks_filtered]
			
 
				+
			
 
				+    if plot:
			
 
				+        # scatter of the peaks
			
 
				+        fig, ax = plt.subplots()
			
 
				+        ax.imshow(arr2D)
			
 
				+        ax.scatter(time_idx, frequency_idx)
			
 
				+        ax.set_xlabel('Time')
			
 
				+        ax.set_ylabel('Frequency')
			
 
				+        ax.set_title("Spectrogram")
			
 
				+        plt.gca().invert_yaxis()
			
 
				+        plt.show()
			
 
				+
			
 
				+    return zip(frequency_idx, time_idx)
			
 
				+
			
 
				+
			
 
				+def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
			
 
				+    """
			
 
				+    Hash list structure:
			
 
				+       sha1_hash[0:20]    time_offset
			
 
				+    [(e05b341a9b77a51fd26, 32), ... ]
			
 
				+    """
			
 
				+    if PEAK_SORT:
			
 
				+        if PY3:
			
 
				+            peaks = sorted(peaks, key=itemgetter(1))
			
 
				+        else:
			
 
				+            peaks.sort(key=itemgetter(1))
			
 
				+
			
 
				+    for i in range(len(peaks)):
			
 
				+        for j in range(1, fan_value):
			
 
				+            if (i + j) < len(peaks):
			
 
				+                
			
 
				+                freq1 = peaks[i][IDX_FREQ_I]
			
 
				+                freq2 = peaks[i + j][IDX_FREQ_I]
			
 
				+                t1 = peaks[i][IDX_TIME_J]
			
 
				+                t2 = peaks[i + j][IDX_TIME_J]
			
 
				+                t_delta = t2 - t1
			
 
				+
			
 
				+                if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
			
 
				+                    if PY3:
			
 
				+                        h = hashlib.sha1(
			
 
				+                            ("%s|%s|%s" % (
			
 
				+                                str(freq1),
			
 
				+                                str(freq2),
			
 
				+                                str(t_delta)
			
 
				+                            )).encode()
			
 
				+                        )
			
 
				+                    else:
			
 
				+                        h = hashlib.sha1(
			
 
				+                            "%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
			
 
				+                    yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
			
--- a/ondemand/dejavu/recognize.py
+++ b/ondemand/dejavu/recognize.py
@@ -0,0 +1,187 @@
 
				+from __future__ import absolute_import
			
 
				+import dejavu.fingerprint as fingerprint
			
 
				+import dejavu.decoder as decoder
			
 
				+import numpy as np
			
 
				+import pyaudio
			
 
				+import time
			
 
				+import sys
			
 
				+import os
			
 
				+import struct
			
 
				+import json
			
 
				+
			
 
				+if sys.version_info > (3, 0):
			
 
				+    from itertools import zip_longest as izip_longest
			
 
				+else:
			
 
				+    from itertools import izip_longest
			
 
				+
			
 
				+class BaseRecognizer(object):
			
 
				+
			
 
				+    def __init__(self, dejavu):
			
 
				+        self.dejavu = dejavu
			
 
				+        self.Fs = fingerprint.DEFAULT_FS
			
 
				+
			
 
				+    def _recognize(self, ads_filter, *data):
			
 
				+        matches = []
			
 
				+        for d in data:
			
 
				+            matches.extend(self.dejavu.find_matches(d, Fs=self.Fs, ads_filter=ads_filter))
			
 
				+        return self.dejavu.align_matches(matches)
			
 
				+
			
 
				+    def recognize(self):
			
 
				+        pass  # base class does nothing
			
 
				+
			
 
				+
			
 
				+class FileRecognizer(BaseRecognizer):
			
 
				+    def __init__(self, dejavu):
			
 
				+        super(FileRecognizer, self).__init__(dejavu)
			
 
				+
			
 
				+    def recognize_file(self, filename):
			
 
				+        frames, self.Fs, file_hash, length = decoder.read(filename, self.dejavu.limit)
			
 
				+
			
 
				+        t = time.time()
			
 
				+        match = self._recognize(*frames)
			
 
				+        t = time.time() - t
			
 
				+
			
 
				+        if match:
			
 
				+            match['match_time'] = t
			
 
				+            match['length'] = length
			
 
				+
			
 
				+        return match
			
 
				+
			
 
				+    def recognize(self, filename):
			
 
				+        return self.recognize_file(filename)
			
 
				+
			
 
				+class FilePerSecondRecognizer(BaseRecognizer):
			
 
				+    def __init__(self, dejavu):
			
 
				+        super(FilePerSecondRecognizer, self).__init__(dejavu)
			
 
				+
			
 
				+    def recognize(self, file_path, seconds = 1, callback = None, ads_filter = None):
			
 
				+        for frames, _, _, length in decoder.read_chunks(file_path, chunk_size = seconds):
			
 
				+            if length == 0:
			
 
				+                break
			
 
				+            
			
 
				+            t0 = time.time()
			
 
				+            match = self._recognize(ads_filter, *frames)
			
 
				+            t0 = time.time() - t0
			
 
				+            if match:
			
 
				+                match["match_time"] = match
			
 
				+            else:
			
 
				+                match = {
			
 
				+                    'confidence': 0,
			
 
				+                    'offset': 0
			
 
				+                }
			
 
				+
			
 
				+            match['length'] = length
			
 
				+            yield match
			
 
				+ 
			
 
				+class StdinRecognizer(BaseRecognizer):
			
 
				+    def __init__(self, dejavu):
			
 
				+        super(StdinRecognizer, self).__init__(dejavu)
			
 
				+
			
 
				+    def recognize_file(self, filename, fmt, ads_filter):
			
 
				+        frames, self.Fs, file_hash, length = decoder.read(filename, self.dejavu.limit, fmt)
			
 
				+
			
 
				+        t = time.time()
			
 
				+        match = self._recognize(ads_filter, *frames)
			
 
				+        t = time.time() - t
			
 
				+
			
 
				+        if match:
			
 
				+            match['match_time'] = t
			
 
				+        else:
			
 
				+            match = {}
			
 
				+            match['confidence'] = 0
			
 
				+        
			
 
				+        match['length'] = length
			
 
				+
			
 
				+        return match
			
 
				+
			
 
				+
			
 
				+    def recognize(self, seconds = 1, callback = None, ads_filter = None):
			
 
				+        rnd = struct.unpack("<L", os.urandom(4))[0]
			
 
				+        fname = "/tmp/{0}".format(rnd)
			
 
				+        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
			
 
				+        while True:
			
 
				+            tmpfile = open(fname, "wb")
			
 
				+            t0 = time.time()
			
 
				+            ln = 0 
			
 
				+            while ln < 1024*17*seconds:
			
 
				+                data = sys.stdin.read(1024)
			
 
				+                ln += len(data)
			
 
				+                tmpfile.write(data)
			
 
				+                tmpfile.flush()
			
 
				+            rec = self.recognize_file(fname, "mp3", ads_filter)
			
 
				+            rec["datetime"] = t0
			
 
				+            if callback is not None:
			
 
				+                callback(rec)
			
 
				+            else:
			
 
				+                print(json.dumps(rec))
			
 
				+            tmpfile.close()
			
 
				+        
			
 
				+class MicrophoneRecognizer(BaseRecognizer):
			
 
				+    default_chunksize   = 8192
			
 
				+    default_format      = pyaudio.paInt16
			
 
				+    default_channels    = 2
			
 
				+    default_samplerate  = 44100
			
 
				+
			
 
				+    def __init__(self, dejavu):
			
 
				+        super(MicrophoneRecognizer, self).__init__(dejavu)
			
 
				+        self.audio = pyaudio.PyAudio()
			
 
				+        self.stream = None
			
 
				+        self.data = []
			
 
				+        self.channels = MicrophoneRecognizer.default_channels
			
 
				+        self.chunksize = MicrophoneRecognizer.default_chunksize
			
 
				+        self.samplerate = MicrophoneRecognizer.default_samplerate
			
 
				+        self.recorded = False
			
 
				+
			
 
				+    def start_recording(self, channels=default_channels,
			
 
				+                        samplerate=default_samplerate,
			
 
				+                        chunksize=default_chunksize):
			
 
				+        self.chunksize = chunksize
			
 
				+        self.channels = channels
			
 
				+        self.recorded = False
			
 
				+        self.samplerate = samplerate
			
 
				+
			
 
				+        if self.stream:
			
 
				+            self.stream.stop_stream()
			
 
				+            self.stream.close()
			
 
				+
			
 
				+        self.stream = self.audio.open(
			
 
				+            format=self.default_format,
			
 
				+            channels=channels,
			
 
				+            rate=samplerate,
			
 
				+            input=True,
			
 
				+            frames_per_buffer=chunksize,
			
 
				+        )
			
 
				+
			
 
				+        self.data = [[] for i in range(channels)]
			
 
				+
			
 
				+    def process_recording(self):
			
 
				+        data = self.stream.read(self.chunksize)
			
 
				+        nums = np.fromstring(data, np.int16)
			
 
				+        for c in range(self.channels):
			
 
				+            self.data[c].extend(nums[c::self.channels])
			
 
				+
			
 
				+    def stop_recording(self):
			
 
				+        self.stream.stop_stream()
			
 
				+        self.stream.close()
			
 
				+        self.stream = None
			
 
				+        self.recorded = True
			
 
				+
			
 
				+    def recognize_recording(self):
			
 
				+        if not self.recorded:
			
 
				+            raise NoRecordingError("Recording was not complete/begun")
			
 
				+        return self._recognize(*self.data)
			
 
				+
			
 
				+    def get_recorded_time(self):
			
 
				+        return len(self.data[0]) / self.rate
			
 
				+
			
 
				+    def recognize(self, seconds=10):
			
 
				+        self.start_recording()
			
 
				+        for i in range(0, int(self.samplerate / self.chunksize
			
 
				+                              * seconds)):
			
 
				+            self.process_recording()
			
 
				+        self.stop_recording()
			
 
				+        return self.recognize_recording()
			
 
				+
			
 
				+
			
 
				+class NoRecordingError(Exception):
			
 
				+    pass
			
--- a/ondemand/dejavu/testing.py
+++ b/ondemand/dejavu/testing.py
@@ -0,0 +1,276 @@
 
				+from __future__ import division
			
 
				+from pydub import AudioSegment
			
 
				+from dejavu.decoder import path_to_songname
			
 
				+from dejavu import Dejavu
			
 
				+from dejavu.fingerprint import *
			
 
				+import traceback
			
 
				+import fnmatch
			
 
				+import os, re, ast
			
 
				+import subprocess
			
 
				+import random
			
 
				+import logging
			
 
				+
			
 
				+def set_seed(seed=None):
			
 
				+    """
			
 
				+    `seed` as None means that the sampling will be random. 
			
 
				+
			
 
				+    Setting your own seed means that you can produce the 
			
 
				+    same experiment over and over. 
			
 
				+    """
			
 
				+    if seed != None:
			
 
				+        random.seed(seed)
			
 
				+
			
 
				+def get_files_recursive(src, fmt):
			
 
				+    """
			
 
				+    `src` is the source directory. 
			
 
				+    `fmt` is the extension, ie ".mp3" or "mp3", etc.
			
 
				+    """
			
 
				+    for root, dirnames, filenames in os.walk(src):
			
 
				+        for filename in fnmatch.filter(filenames, '*' + fmt):
			
 
				+            yield os.path.join(root, filename)
			
 
				+
			
 
				+def get_length_audio(audiopath, extension):
			
 
				+    """
			
 
				+    Returns length of audio in seconds. 
			
 
				+    Returns None if format isn't supported or in case of error. 
			
 
				+    """
			
 
				+    try:
			
 
				+        audio = AudioSegment.from_file(audiopath, extension.replace(".", ""))
			
 
				+    except:
			
 
				+        print "Error in get_length_audio(): %s" % traceback.format_exc()
			
 
				+        return None
			
 
				+    return int(len(audio) / 1000.0)
			
 
				+
			
 
				+def get_starttime(length, nseconds, padding):
			
 
				+    """
			
 
				+    `length` is total audio length in seconds
			
 
				+    `nseconds` is amount of time to sample in seconds
			
 
				+    `padding` is off-limits seconds at beginning and ending
			
 
				+    """
			
 
				+    maximum = length - padding - nseconds
			
 
				+    if padding > maximum:
			
 
				+        return 0
			
 
				+    return random.randint(padding, maximum)
			
 
				+
			
 
				+def generate_test_files(src, dest, nseconds, fmts=[".mp3", ".wav"], padding=10):
			
 
				+    """
			
 
				+    Generates a test file for each file recursively in `src` directory
			
 
				+    of given format using `nseconds` sampled from the audio file. 
			
 
				+
			
 
				+    Results are written to `dest` directory.
			
 
				+
			
 
				+    `padding` is the number of off-limit seconds and the beginning and
			
 
				+    end of a track that won't be sampled in testing. Often you want to 
			
 
				+    avoid silence, etc. 
			
 
				+    """
			
 
				+    # create directories if necessary
			
 
				+    for directory in [src, dest]:
			
 
				+        try:
			
 
				+            os.stat(directory)
			
 
				+        except:
			
 
				+            os.mkdir(directory)
			
 
				+
			
 
				+    # find files recursively of a given file format
			
 
				+    for fmt in fmts:
			
 
				+        testsources = get_files_recursive(src, fmt) 
			
 
				+        for audiosource in testsources:
			
 
				+
			
 
				+            print "audiosource:", audiosource
			
 
				+            
			
 
				+            filename, extension = os.path.splitext(os.path.basename(audiosource))
			
 
				+            length = get_length_audio(audiosource, extension) 
			
 
				+            starttime = get_starttime(length, nseconds, padding)
			
 
				+
			
 
				+            test_file_name = "%s_%s_%ssec.%s" % (
			
 
				+                os.path.join(dest, filename), starttime, 
			
 
				+                nseconds, extension.replace(".", ""))
			
 
				+            
			
 
				+            subprocess.check_output([
			
 
				+                "ffmpeg", "-y",
			
 
				+                "-ss", "%d" % starttime, 
			
 
				+                '-t' , "%d" % nseconds, 
			
 
				+                "-i", audiosource,
			
 
				+                test_file_name])
			
 
				+
			
 
				+def log_msg(msg, log=True, silent=False):
			
 
				+    if log:
			
 
				+        logging.debug(msg)
			
 
				+    if not silent:
			
 
				+        print msg
			
 
				+
			
 
				+def autolabel(rects, ax):
			
 
				+    # attach some text labels
			
 
				+    for rect in rects:
			
 
				+        height = rect.get_height()
			
 
				+        ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, 
			
 
				+            '%d' % int(height), ha='center', va='bottom')
			
 
				+
			
 
				+def autolabeldoubles(rects, ax):
			
 
				+    # attach some text labels
			
 
				+    for rect in rects:
			
 
				+        height = rect.get_height()
			
 
				+        ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, 
			
 
				+            '%s' % round(float(height), 3), ha='center', va='bottom')
			
 
				+
			
 
				+class DejavuTest(object):
			
 
				+    def __init__(self, folder, seconds):
			
 
				+        super(DejavuTest, self).__init__()
			
 
				+
			
 
				+        self.test_folder = folder
			
 
				+        self.test_seconds = seconds
			
 
				+        self.test_songs = []
			
 
				+
			
 
				+        print "test_seconds", self.test_seconds
			
 
				+
			
 
				+        self.test_files = [
			
 
				+            f for f in os.listdir(self.test_folder) 
			
 
				+            if os.path.isfile(os.path.join(self.test_folder, f)) 
			
 
				+            and re.findall("[0-9]*sec", f)[0] in self.test_seconds]
			
 
				+
			
 
				+        print "test_files", self.test_files
			
 
				+
			
 
				+        self.n_columns = len(self.test_seconds)
			
 
				+        self.n_lines = int(len(self.test_files) / self.n_columns)
			
 
				+
			
 
				+        print "columns:", self.n_columns
			
 
				+        print "length of test files:", len(self.test_files)
			
 
				+        print "lines:", self.n_lines
			
 
				+
			
 
				+        # variable match results (yes, no, invalid)
			
 
				+        self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] 
			
 
				+
			
 
				+        print "result_match matrix:", self.result_match 
			
 
				+
			
 
				+        # variable match precision (if matched in the corrected time)
			
 
				+        self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] 
			
 
				+
			
 
				+        # variable mahing time (query time) 
			
 
				+        self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] 
			
 
				+
			
 
				+        # variable confidence
			
 
				+        self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] 
			
 
				+
			
 
				+        self.begin()
			
 
				+
			
 
				+    def get_column_id (self, secs):
			
 
				+        for i, sec in enumerate(self.test_seconds):
			
 
				+            if secs == sec:
			
 
				+                return i
			
 
				+
			
 
				+    def get_line_id (self, song):
			
 
				+        for i, s in enumerate(self.test_songs):
			
 
				+            if song == s:
			
 
				+                return i
			
 
				+        self.test_songs.append(song)
			
 
				+        return len(self.test_songs) - 1
			
 
				+
			
 
				+    def create_plots(self, name, results, results_folder):
			
 
				+        for sec in range(0, len(self.test_seconds)):
			
 
				+            ind = np.arange(self.n_lines) #
			
 
				+            width = 0.25       # the width of the bars
			
 
				+
			
 
				+            fig = plt.figure()
			
 
				+            ax = fig.add_subplot(111)
			
 
				+            ax.set_xlim([-1 * width, 2 * width])
			
 
				+
			
 
				+            means_dvj = [x[0] for x in results[sec]]
			
 
				+            rects1 = ax.bar(ind, means_dvj, width, color='r')
			
 
				+
			
 
				+            # add some
			
 
				+            ax.set_ylabel(name)
			
 
				+            ax.set_title("%s %s Results" % (self.test_seconds[sec], name)) 
			
 
				+            ax.set_xticks(ind + width)
			
 
				+
			
 
				+            labels = [0 for x in range(0, self.n_lines)]
			
 
				+            for x in range(0, self.n_lines):
			
 
				+                labels[x] = "song %s" % (x+1)
			
 
				+            ax.set_xticklabels(labels)
			
 
				+
			
 
				+            box = ax.get_position()
			
 
				+            ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
			
 
				+
			
 
				+            #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
			
 
				+
			
 
				+            if name == 'Confidence':
			
 
				+                autolabel(rects1, ax)
			
 
				+            else:
			
 
				+                autolabeldoubles(rects1, ax)
			
 
				+
			
 
				+            plt.grid()
			
 
				+
			
 
				+            fig_name = os.path.join(results_folder, "%s_%s.png" % (name, self.test_seconds[sec]))
			
 
				+            fig.savefig(fig_name)
			
 
				+
			
 
				+    def begin(self):
			
 
				+        for f in self.test_files:
			
 
				+            log_msg('--------------------------------------------------')
			
 
				+            log_msg('file: %s' % f)
			
 
				+
			
 
				+            # get column 
			
 
				+            col = self.get_column_id(re.findall("[0-9]*sec", f)[0])
			
 
				+            # format: XXXX_offset_length.mp3
			
 
				+            song = path_to_songname(f).split("_")[0]  
			
 
				+            line = self.get_line_id(song)
			
 
				+            result = subprocess.check_output([
			
 
				+                "python", 
			
 
				+                "dejavu.py",
			
 
				+                '-r',
			
 
				+                'file', 
			
 
				+                self.test_folder + "/" + f])
			
 
				+
			
 
				+            if result.strip() == "None":
			
 
				+                log_msg('No match')
			
 
				+                self.result_match[line][col] = 'no'
			
 
				+                self.result_matching_times[line][col] = 0
			
 
				+                self.result_query_duration[line][col] = 0
			
 
				+                self.result_match_confidence[line][col] = 0
			
 
				+            
			
 
				+            else:
			
 
				+                result = result.strip()
			
 
				+                result = result.replace(" \'", ' "')
			
 
				+                result = result.replace("{\'", '{"')
			
 
				+                result = result.replace("\':", '":')
			
 
				+                result = result.replace("\',", '",')
			
 
				+
			
 
				+                # which song did we predict?
			
 
				+                result = ast.literal_eval(result)
			
 
				+                song_result = result["song_name"]
			
 
				+                log_msg('song: %s' % song)
			
 
				+                log_msg('song_result: %s' % song_result)
			
 
				+
			
 
				+                if song_result != song:
			
 
				+                    log_msg('invalid match')
			
 
				+                    self.result_match[line][col] = 'invalid'
			
 
				+                    self.result_matching_times[line][col] = 0
			
 
				+                    self.result_query_duration[line][col] = 0
			
 
				+                    self.result_match_confidence[line][col] = 0
			
 
				+                else:
			
 
				+                    log_msg('correct match')
			
 
				+                    print self.result_match
			
 
				+                    self.result_match[line][col] = 'yes'
			
 
				+                    self.result_query_duration[line][col] = round(result[Dejavu.MATCH_TIME],3)
			
 
				+                    self.result_match_confidence[line][col] = result[Dejavu.CONFIDENCE]
			
 
				+
			
 
				+                    song_start_time = re.findall("\_[^\_]+",f)
			
 
				+                    song_start_time = song_start_time[0].lstrip("_ ")
			
 
				+
			
 
				+                    result_start_time = round((result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE * 
			
 
				+                        DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS), 0)
			
 
				+
			
 
				+                    self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time)
			
 
				+                    if (abs(self.result_matching_times[line][col]) == 1):
			
 
				+                        self.result_matching_times[line][col] = 0
			
 
				+
			
 
				+                    log_msg('query duration: %s' % round(result[Dejavu.MATCH_TIME],3))
			
 
				+                    log_msg('confidence: %s' % result[Dejavu.CONFIDENCE])
			
 
				+                    log_msg('song start_time: %s' % song_start_time)
			
 
				+                    log_msg('result start time: %s' % result_start_time)
			
 
				+                    if (self.result_matching_times[line][col] == 0):
			
 
				+                        log_msg('accurate match')
			
 
				+                    else:
			
 
				+                        log_msg('inaccurate match')
			
 
				+            log_msg('--------------------------------------------------\n')
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/ondemand/dejavu/wavio.py
+++ b/ondemand/dejavu/wavio.py
@@ -0,0 +1,121 @@
 
				+# wavio.py
			
 
				+# Author: Warren Weckesser
			
 
				+# License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
			
 
				+# Synopsis: A Python module for reading and writing 24 bit WAV files.
			
 
				+# Github: github.com/WarrenWeckesser/wavio
			
 
				+
			
 
				+import wave as _wave
			
 
				+import numpy as _np
			
 
				+
			
 
				+
			
 
				+def _wav2array(nchannels, sampwidth, data):
			
 
				+    """data must be the string containing the bytes from the wav file."""
			
 
				+    num_samples, remainder = divmod(len(data), sampwidth * nchannels)
			
 
				+    if remainder > 0:
			
 
				+        raise ValueError('The length of data is not a multiple of '
			
 
				+                         'sampwidth * num_channels.')
			
 
				+    if sampwidth > 4:
			
 
				+        raise ValueError("sampwidth must not be greater than 4.")
			
 
				+
			
 
				+    if sampwidth == 3:
			
 
				+        a = _np.empty((num_samples, nchannels, 4), dtype=_np.uint8)
			
 
				+        raw_bytes = _np.fromstring(data, dtype=_np.uint8)
			
 
				+        a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
			
 
				+        a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
			
 
				+        result = a.view('<i4').reshape(a.shape[:-1])
			
 
				+    else:
			
 
				+        # 8 bit samples are stored as unsigned ints; others as signed ints.
			
 
				+        dt_char = 'u' if sampwidth == 1 else 'i'
			
 
				+        a = _np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
			
 
				+        result = a.reshape(-1, nchannels)
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def readwav(file):
			
 
				+    """
			
 
				+    Read a WAV file.
			
 
				+
			
 
				+    Parameters
			
 
				+    ----------
			
 
				+    file : string or file object
			
 
				+        Either the name of a file or an open file pointer.
			
 
				+
			
 
				+    Return Values
			
 
				+    -------------
			
 
				+    rate : float
			
 
				+        The sampling frequency (i.e. frame rate)
			
 
				+    sampwidth : float
			
 
				+        The sample width, in bytes.  E.g. for a 24 bit WAV file,
			
 
				+        sampwidth is 3.
			
 
				+    data : numpy array
			
 
				+        The array containing the data.  The shape of the array is
			
 
				+        (num_samples, num_channels).  num_channels is the number of
			
 
				+        audio channels (1 for mono, 2 for stereo).
			
 
				+
			
 
				+    Notes
			
 
				+    -----
			
 
				+    This function uses the `wave` module of the Python standard libary
			
 
				+    to read the WAV file, so it has the same limitations as that library.
			
 
				+    In particular, the function does not read compressed WAV files.
			
 
				+
			
 
				+    """
			
 
				+    wav = _wave.open(file)
			
 
				+    rate = wav.getframerate()
			
 
				+    nchannels = wav.getnchannels()
			
 
				+    sampwidth = wav.getsampwidth()
			
 
				+    nframes = wav.getnframes()
			
 
				+    data = wav.readframes(nframes)
			
 
				+    wav.close()
			
 
				+    array = _wav2array(nchannels, sampwidth, data)
			
 
				+    return rate, sampwidth, array
			
 
				+
			
 
				+
			
 
				+def writewav24(filename, rate, data):
			
 
				+    """
			
 
				+    Create a 24 bit wav file.
			
 
				+
			
 
				+    Parameters
			
 
				+    ----------
			
 
				+    filename : string
			
 
				+        Name of the file to create.
			
 
				+    rate : float
			
 
				+        The sampling frequency (i.e. frame rate) of the data.
			
 
				+    data : array-like collection of integer or floating point values
			
 
				+        data must be "array-like", either 1- or 2-dimensional.  If it
			
 
				+        is 2-d, the rows are the frames (i.e. samples) and the columns
			
 
				+        are the channels.
			
 
				+
			
 
				+    Notes
			
 
				+    -----
			
 
				+    The data is assumed to be signed, and the values are assumed to be
			
 
				+    within the range of a 24 bit integer.  Floating point values are
			
 
				+    converted to integers.  The data is not rescaled or normalized before
			
 
				+    writing it to the file.
			
 
				+
			
 
				+    Example
			
 
				+    -------
			
 
				+    Create a 3 second 440 Hz sine wave.
			
 
				+
			
 
				+    >>> rate = 22050  # samples per second
			
 
				+    >>> T = 3         # sample duration (seconds)
			
 
				+    >>> f = 440.0     # sound frequency (Hz)
			
 
				+    >>> t = np.linspace(0, T, T*rate, endpoint=False)
			
 
				+    >>> x = (2**23 - 1) * np.sin(2 * np.pi * f * t)
			
 
				+    >>> writewav24("sine24.wav", rate, x)
			
 
				+
			
 
				+    """
			
 
				+    a32 = _np.asarray(data, dtype=_np.int32)
			
 
				+    if a32.ndim == 1:
			
 
				+        # Convert to a 2D array with a single column.
			
 
				+        a32.shape = a32.shape + (1,)
			
 
				+    # By shifting first 0 bits, then 8, then 16, the resulting output
			
 
				+    # is 24 bit little-endian.
			
 
				+    a8 = (a32.reshape(a32.shape + (1,)) >> _np.array([0, 8, 16])) & 255
			
 
				+    wavdata = a8.astype(_np.uint8).tostring()
			
 
				+
			
 
				+    w = _wave.open(filename, 'wb')
			
 
				+    w.setnchannels(a32.shape[1])
			
 
				+    w.setsampwidth(3)
			
 
				+    w.setframerate(rate)
			
 
				+    w.writeframes(wavdata)
			
 
				+    w.close()
			
--- a/ondemand/endpoint.py
+++ b/ondemand/endpoint.py
@@ -1,6 +1,5 @@
 
				 from __future__ import absolute_import
			
 
				 from tornado.web import RequestHandler, Application
			
 
				-import ondemand
			
 
				 import psutil
			
 
				 import os
			
 
				 
			
@@ -11,7 +10,7 @@ class InformerHandler(RequestHandler):
 
				         process = psutil.Process()
			
 
				         queue = settings['queue']
			
 
				         return self.write(dict(
			
 
				-            version=ondemand.__version__,
			
 
				+            version=__version__,
			
 
				             processID=os.getpid(),
			
 
				             memoryUsage=process.memory_info().rss,
			
 
				             queueSize=queue.qsize(),
			
--- a/ondemand/service.py
+++ b/ondemand/service.py
@@ -2,13 +2,13 @@
 
				 from __future__ import print_function, absolute_import
			
 
				 from tornado.ioloop import IOLoop
			
 
				 from tornado.web import Application
			
 
				-from fourier.api.client import Client, ConnectionError
			
 
				-from fourier.boxconfig import parse_config
			
 
				-from fourier.dejavu.recognize import FilePerSecondRecognizer
			
 
				+from client import Client, ConnectionError
			
 
				+from boxconfig import parse_config
			
 
				+from dejavu.recognize import FilePerSecondRecognizer
			
 
				 from datetime import datetime, timedelta
			
 
				-from ondemand.endpoint import setup_endpoint
			
 
				-from ondemand.calibration import Calibrations
			
 
				-from fourier.dejavu import Dejavu, CouldntDecodeError
			
 
				+from endpoint import setup_endpoint
			
 
				+from calibration import Calibrations
			
 
				+from dejavu import Dejavu, CouldntDecodeError
			
 
				 from firebase_admin import credentials
			
 
				 from firebase_admin import db as fbdb
			
 
				 from binascii import hexlify
			
@@ -20,7 +20,6 @@ from subprocess import Popen, PIPE
 
				 import logging as log
			
 
				 import firebase_admin
			
 
				 import mutagen.mp3
			
 
				-import OpenSSL.SSL
			
 
				 import subprocess
			
 
				 import requests
			
 
				 import dateutil
			
@@ -69,7 +68,7 @@ client = Client(config['device_id'],
 
				 cloud_base_url = 'https://storage.googleapis.com/{}' \
			
 
				     .format(config['bucket'])
			
 
				 base_path = config.get("basepath", "/var/fourier")
			
 
				-fb_credentials = credentials.Certificate('/etc/Fourier-key.json')
			
 
				+fb_credentials = credentials.Certificate('/code/Fourier-key.json')
			
 
				 firebase_admin.initialize_app(fb_credentials, config['firebase'])
			
 
				 device_id = config['device_id']
			
 
				 device_path = os.path.join(base_path, device_id)
			
@@ -184,7 +183,7 @@ def process_queue_with_threads():
 
				 
			
 
				 def process_segment(item, audios=None, calibration=None):
			
 
				     """ Procesa una hora de audio """
			
 
				-
			
 
				+    print(item)
			
 
				     station = item['estacion']
			
 
				 
			
 
				     if not calibration:
			
@@ -535,20 +534,21 @@ def download_file(file_path=None):
 
				 
			
 
				 def get_pendings():
			
 
				     url = 'https://api.fourier.audio/v1/calendario/pendiente?id={}'.format(config['device_id'], )
			
 
				-    headers = {
			
 
				-        'Authorization': 'Bearer {}'.format(config['apiSecret'], )
			
 
				-    }
			
 
				-    response = requests.get(url, headers=headers)
			
 
				+    #headers = {
			
 
				+    #    'Authorization': 'Bearer {}'.format(config['apiSecret'], )
			
 
				+    #}
			
 
				+    response = requests.get(url)
			
 
				+    log.info(response.json())
			
 
				     return response.json()
			
 
				 
			
 
				 
			
 
				 def send_results(item):
			
 
				     url = 'https://api.fourier.audio/v1/calendario/resultado'
			
 
				-    headers = {
			
 
				-        'Authorization': 'Bearer {}'.format(config['apiSecret'], )
			
 
				-    }
			
 
				+    #headers = {
			
 
				+    #    'Authorization': 'Bearer {}'.format(config['apiSecret'], )
			
 
				+    #}
			
 
				     log.info('url: {}'.format(url))
			
 
				-    response = requests.post(url, json=item, headers=headers)
			
 
				+    response = requests.post(url, json=item)
			
 
				     return response
			
 
				 
			
 
				 
			
--- a/ondemand/service1.py
+++ b/ondemand/service1.py
@@ -0,0 +1,269 @@
 
				+# -*- coding: utf8 -*-
			
 
				+from __future__ import print_function, absolute_import
			
 
				+from tornado.ioloop import IOLoop
			
 
				+from tornado.web import Application
			
 
				+from client import Client, ConnectionError
			
 
				+from boxconfig import parse_config
			
 
				+from dejavu.recognize import FilePerSecondRecognizer
			
 
				+from datetime import datetime, timedelta
			
 
				+from endpoint import setup_endpoint
			
 
				+from calibration import Calibrations
			
 
				+from dejavu import Dejavu, CouldntDecodeError
			
 
				+from firebase_admin import credentials
			
 
				+from firebase_admin import db as fbdb
			
 
				+from multiprocessing import Process
			
 
				+from subprocess import Popen, PIPE
			
 
				+import logging as log
			
 
				+import firebase_admin
			
 
				+import mutagen.mp3
			
 
				+import math
			
 
				+import sys
			
 
				+import os
			
 
				+
			
 
				+if sys.version_info >= (3, 0):
			
 
				+    from queue import Queue, Empty
			
 
				+else:
			
 
				+    from Queue import Queue, Empty
			
 
				+
			
 
				+log.basicConfig(format='[%(asctime)s] [%(module)s] %(message)s', level=log.INFO)
			
 
				+
			
 
				+AUDIOS_PATH = '/tmp'
			
 
				+AHEAD_TIME_AUDIO_TOLERANCE = 2  # second
			
 
				+MAX_SEGMENT_THREADS = 4
			
 
				+THRESHOLD = 10
			
 
				+SEGMENTS_TOLERANCE_RATE = 0.6
			
 
				+FALL_TOLERANCE_SEGMENTS = 1
			
 
				+
			
 
				+# THRESHOLD
			
 
				+THRESHOLD_FIXED = 1
			
 
				+THRESHOLD_AVERAGE = 2
			
 
				+
			
 
				+# Modos de procesamiento de queue
			
 
				+#  - QUEQUE_SINGLE: procesa solo un segmento a la vez
			
 
				+#  - QUEUE_THREAD:  inicia un hilo para cada segmento
			
 
				+# Por default se usará el threaded.
			
 
				+# TODO: hacerlo configurable por medio de argumentos
			
 
				+#       de ejecución.
			
 
				+QUEUE_SINGLE = 1
			
 
				+QUEUE_THREAD = 2
			
 
				+
			
 
				+# Se pueden usar diferentes API'se
			
 
				+# la de threading y la de multiprocessing.
			
 
				+MultiAPI = Process
			
 
				+
			
 
				+config = parse_config()
			
 
				+queue = Queue()
			
 
				+client = Client(config['device_id'],
			
 
				+                config['apiSecret'])
			
 
				+cloud_base_url = 'https://storage.googleapis.com/{}' \
			
 
				+    .format(config['bucket'])
			
 
				+base_path = config.get("basepath", "/var/fourier")
			
 
				+fb_credentials = credentials.Certificate('/code/Fourier-key.json')
			
 
				+firebase_admin.initialize_app(fb_credentials, config['firebase'])
			
 
				+device_id = config['device_id']
			
 
				+device_path = os.path.join(base_path, device_id)
			
 
				+recognizer = FilePerSecondRecognizer
			
 
				+device_ref = fbdb.reference('devices').child(config['device_id'])
			
 
				+calibrations = Calibrations(config['device_id'], client=client)
			
 
				+
			
 
				+# settings
			
 
				+queue_mode = QUEUE_SINGLE
			
 
				+threshold_mode = THRESHOLD_FIXED
			
 
				+
			
 
				+db_path = config.get('localDatabase', os.path.join(device_path, 'files.db'))
			
 
				+#db = sqlite3.connect(db_path)
			
 
				+cloud_cache = {}
			
 
				+
			
 
				+def process_segment(audios=None, calibration=None):
			
 
				+    """ Procesa una hora de audio """
			
 
				+
			
 
				+    #date = dateutil.parser.parse(item['fecha'], ignoretz=True)
			
 
				+    segment_size = 5
			
 
				+    audio_length = 0
			
 
				+
			
 
				+
			
 
				+    # 1.1 Calcular el número de segmentos requeridos
			
 
				+    # de acuerdo a la duración total del audio.
			
 
				+    try:
			
 
				+        filename = "/tmp/anuncios/-MOiAvmUkZLmXrAWCy9u.mp3"
			
 
				+        audio = mutagen.mp3.MP3(filename)
			
 
				+        audio_length = audio.info.length
			
 
				+
			
 
				+        if segment_size == 'integer':
			
 
				+            segment_size = int(audio_length)
			
 
				+        elif segment_size == 'ceil':
			
 
				+            segment_size = int(math.ceil(audio_length / 5)) * 5
			
 
				+
			
 
				+        segments_needed = int(round(float(audio_length) / float(segment_size)))
			
 
				+        segments_needed = int(round(segments_needed * 0.8))
			
 
				+
			
 
				+    except Exception as ex:
			
 
				+        #log.error('[process_segment] file {} is not an mp3'.format(filename))
			
 
				+        log.error(str(ex))
			
 
				+        return
			
 
				+
			
 
				+    dejavu = Dejavu({"database_type": "mem"})
			
 
				+    try:    
			
 
				+        dejavu.fingerprint_file(filename)
			
 
				+    except Exception as ex:
			
 
				+        log.error('[process_segment] cannot fingerprint: {}'.format(ex))
			
 
				+
			
 
				+    # 2. Read the list of files from local database
			
 
				+    audios_counter = 0
			
 
				+    results = []
			
 
				+    v = []
			
 
				+    audios_iterable = []
			
 
				+
			
 
				+    for path, name, ts in audios_iterable:
			
 
				+        audios_counter += os.path.isfile(path)
			
 
				+        values = []
			
 
				+
			
 
				+        try:
			
 
				+            for match in dejavu.recognize(recognizer, path, segment_size):
			
 
				+                name = None
			
 
				+                ad = None
			
 
				+
			
 
				+                results.append({
			
 
				+                    'ad': ad,
			
 
				+                    'confidence': match['confidence'],
			
 
				+                    'timestamp': ts,
			
 
				+                    'offset': match['offset'],
			
 
				+                    'name': name
			
 
				+                })
			
 
				+                values.append(str(match['confidence']))
			
 
				+
			
 
				+                ts += match['length'] / 1000
			
 
				+
			
 
				+            v.append(','.join(values))
			
 
				+            log.info('[process_segment] {0}) {1}'.format(
			
 
				+                os.path.split(path)[-1],
			
 
				+                ','.join(values),
			
 
				+            ))
			
 
				+
			
 
				+        except CouldntDecodeError as ex:
			
 
				+            log.error('[process_segment] {}'.format(ex))
			
 
				+
			
 
				+    try:
			
 
				+        encontrados = {}
			
 
				+        for i in item_ids:
			
 
				+            r = [result for result in results if result["name"] == i]
			
 
				+            encontrados[i] = find_repetitions(r, segments_needed=segments_needed, calibration=calibration,)
			
 
				+
			
 
				+        #for id in encontrados:
			
 
				+        #    for e in encontrados[id]:
			
 
				+        #        for i in item['elementos']:
			
 
				+        #            if i['id'] == id and i['anuncio'] == e['ad']:
			
 
				+        #                if 'encontrados' not in i:
			
 
				+        #                    i['encontrados'] = []
			
 
				+        #                i['encontrados'].append(e)
			
 
				+        #                break
			
 
				+
			
 
				+        #item["archivos_perdidos"] = (12 - audios_counter) if audios_counter < 12 else 0
			
 
				+    except ConnectionError as ex:
			
 
				+        log.error('[process_segment] {}'.format(str(ex)))
			
 
				+    except UserWarning as warn:
			
 
				+        log.warning(str(warn))
			
 
				+
			
 
				+
			
 
				+def find_repetitions(results, segments_needed=2, calibration=None):
			
 
				+    found_counter = 0
			
 
				+    found_down_counter = 0
			
 
				+    found_index = None
			
 
				+    expect_space = False
			
 
				+    expect_recover = False
			
 
				+    last_value_in_threshold_index = -1
			
 
				+    fall_tolerance = calibration['fallTolerance']
			
 
				+    found = []
			
 
				+
			
 
				+    high = 100 # Obtener este valor desde un parámetro
			
 
				+    middle_high = 50 # Obtener este valor desde un parámetro
			
 
				+    segment_middle_needed = 2 # Obtener este valor desde un parámetro
			
 
				+    found_high = None
			
 
				+    found_middle_high = []
			
 
				+
			
 
				+    if threshold_mode == THRESHOLD_FIXED:
			
 
				+        threshold = calibration['threshold']
			
 
				+    elif threshold_mode == THRESHOLD_AVERAGE:
			
 
				+        values = [x['confidence'] for x in results]
			
 
				+        threshold = math.ceil(float(sum(values)) / float(len(values)))
			
 
				+
			
 
				+    if segments_needed < 1:
			
 
				+        segments_needed = 1
			
 
				+
			
 
				+    for index, result in enumerate(results):
			
 
				+        #if result['confidence'] >= high:
			
 
				+        #    if found_high is None:
			
 
				+        #        found_high = index
			
 
				+        #    elif result['confidence'] > results[found_high]['confidence']:
			
 
				+        #        found_high = index
			
 
				+        #elif result['confidence'] >= middle_high:
			
 
				+        #    found_middle_high.append(index)
			
 
				+
			
 
				+        if not expect_space:
			
 
				+            if result['confidence'] >= threshold:
			
 
				+                found_counter += 1
			
 
				+                last_value_in_threshold_index = index
			
 
				+                if found_index is None:
			
 
				+                    found_index = index
			
 
				+                if expect_recover:
			
 
				+                    found_counter += found_down_counter
			
 
				+                    expect_recover = False
			
 
				+
			
 
				+            elif fall_tolerance:
			
 
				+                if not expect_recover:
			
 
				+                    if last_value_in_threshold_index != -1:
			
 
				+                        """ Solo cuando ya haya entrado por lo menos
			
 
				+                        un valor en el rango del threshold, es cuando
			
 
				+                        se podrá esperar un valor bajo """
			
 
				+                        expect_recover = True
			
 
				+                        found_down_counter += 1
			
 
				+                    else:
			
 
				+                        pass
			
 
				+                else:
			
 
				+                    """ Si después de haber pasado tolerado 1 elemento
			
 
				+                    vuelve a salir otro fuera del threshold continuo,
			
 
				+                    entonces ya se da por perdido """
			
 
				+                    found_counter = 0
			
 
				+                    found_down_counter = 0
			
 
				+                    found_index = None
			
 
				+                    expect_recover = False
			
 
				+
			
 
				+            else:
			
 
				+                found_counter = 0
			
 
				+                found_down_counter = 0
			
 
				+                found_index = None
			
 
				+                expect_recover = False
			
 
				+                # Aquí veremos si hay un valor alto
			
 
				+                #if found_high is not None:
			
 
				+                #    found_row = results[found_high]
			
 
				+                #    found.append(found_row)
			
 
				+                #elif len(found_middle_high) >= segment_middle_needed:
			
 
				+                #    found_row = results[found_middle_high[0]]
			
 
				+                #    found.append(found_row)
			
 
				+                #found_high = None
			
 
				+                #found_middle_high = []
			
 
				+
			
 
				+        else:
			
 
				+            if result['confidence'] <= threshold:
			
 
				+                expect_space = False
			
 
				+
			
 
				+        if found_counter >= segments_needed:
			
 
				+            found_row = results[found_index]
			
 
				+            found.append(found_row)
			
 
				+            found_counter = 0
			
 
				+            expect_space = True
			
 
				+            #found_high = None
			
 
				+            #found_middle_high = []
			
 
				+
			
 
				+    return found
			
 
				+
			
 
				+app = setup_endpoint(queue=queue)
			
 
				+loop = IOLoop.current()
			
 
				+loop.add_callback(process_segment)
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    try:
			
 
				+        log.info('Starting ondemand service')
			
 
				+        loop.start()
			
 
				+    except KeyboardInterrupt:
			
 
				+        log.error('Process killed')
			
--- a/run.sh
+++ b/run.sh
@@ -0,0 +1,5 @@
 
				+#!/bin/sh
			
 
				+###prueba
			
 
				+#sudo docker run -it -v ~/Desarrollo/fourier-ondemand/:/code -v /var/fourier:/var/fourier ondemand bash
			
 
				+###produccion
			
 
				+sudo docker run -d -v ~/Desarrollo/fourier-ondemand/:/code -v /var/fourier:/var/fourier ondemand