123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- from __future__ import absolute_import
- import dejavu.fingerprint as fingerprint
- import dejavu.decoder as decoder
- import numpy as np
- import pyaudio
- import time
- import sys
- import os
- import struct
- import json
- if sys.version_info > (3, 0):
- from itertools import zip_longest as izip_longest
- else:
- from itertools import izip_longest
- class BaseRecognizer(object):
- def __init__(self, dejavu):
- self.dejavu = dejavu
- self.Fs = fingerprint.DEFAULT_FS
- def _recognize(self, ads_filter, *data):
- matches = []
- for d in data:
- matches.extend(self.dejavu.find_matches(d, Fs=self.Fs, ads_filter=ads_filter))
- return self.dejavu.align_matches(matches)
- def recognize(self):
- pass # base class does nothing
- class FileRecognizer(BaseRecognizer):
- def __init__(self, dejavu):
- super(FileRecognizer, self).__init__(dejavu)
- def recognize_file(self, filename):
- frames, self.Fs, file_hash, length = decoder.read(filename, self.dejavu.limit)
- t = time.time()
- match = self._recognize(*frames)
- t = time.time() - t
- if match:
- match['match_time'] = t
- match['length'] = length
- return match
- def recognize(self, filename):
- return self.recognize_file(filename)
- class FilePerSecondRecognizer(BaseRecognizer):
- def __init__(self, dejavu):
- super(FilePerSecondRecognizer, self).__init__(dejavu)
- def recognize(self, file_path, seconds = 1, callback = None, ads_filter = None):
- for frames, _, _, length in decoder.read_chunks(file_path, chunk_size = seconds):
- if length == 0:
- break
-
- t0 = time.time()
- match = self._recognize(ads_filter, *frames)
- t0 = time.time() - t0
- if match:
- match["match_time"] = match
- else:
- match = {
- 'confidence': 0,
- 'offset': 0
- }
- match['length'] = length
- yield match
-
- class StdinRecognizer(BaseRecognizer):
- def __init__(self, dejavu):
- super(StdinRecognizer, self).__init__(dejavu)
- def recognize_file(self, filename, fmt, ads_filter):
- frames, self.Fs, file_hash, length = decoder.read(filename, self.dejavu.limit, fmt)
- t = time.time()
- match = self._recognize(ads_filter, *frames)
- t = time.time() - t
- if match:
- match['match_time'] = t
- else:
- match = {}
- match['confidence'] = 0
-
- match['length'] = length
- return match
- def recognize(self, seconds = 1, callback = None, ads_filter = None):
- rnd = struct.unpack("<L", os.urandom(4))[0]
- fname = "/tmp/{0}".format(rnd)
- sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
- while True:
- tmpfile = open(fname, "wb")
- t0 = time.time()
- ln = 0
- while ln < 1024*17*seconds:
- data = sys.stdin.read(1024)
- ln += len(data)
- tmpfile.write(data)
- tmpfile.flush()
- rec = self.recognize_file(fname, "mp3", ads_filter)
- rec["datetime"] = t0
- if callback is not None:
- callback(rec)
- else:
- print(json.dumps(rec))
- tmpfile.close()
-
- class MicrophoneRecognizer(BaseRecognizer):
- default_chunksize = 8192
- default_format = pyaudio.paInt16
- default_channels = 2
- default_samplerate = 44100
- def __init__(self, dejavu):
- super(MicrophoneRecognizer, self).__init__(dejavu)
- self.audio = pyaudio.PyAudio()
- self.stream = None
- self.data = []
- self.channels = MicrophoneRecognizer.default_channels
- self.chunksize = MicrophoneRecognizer.default_chunksize
- self.samplerate = MicrophoneRecognizer.default_samplerate
- self.recorded = False
- def start_recording(self, channels=default_channels,
- samplerate=default_samplerate,
- chunksize=default_chunksize):
- self.chunksize = chunksize
- self.channels = channels
- self.recorded = False
- self.samplerate = samplerate
- if self.stream:
- self.stream.stop_stream()
- self.stream.close()
- self.stream = self.audio.open(
- format=self.default_format,
- channels=channels,
- rate=samplerate,
- input=True,
- frames_per_buffer=chunksize,
- )
- self.data = [[] for i in range(channels)]
- def process_recording(self):
- data = self.stream.read(self.chunksize)
- nums = np.fromstring(data, np.int16)
- for c in range(self.channels):
- self.data[c].extend(nums[c::self.channels])
- def stop_recording(self):
- self.stream.stop_stream()
- self.stream.close()
- self.stream = None
- self.recorded = True
- def recognize_recording(self):
- if not self.recorded:
- raise NoRecordingError("Recording was not complete/begun")
- return self._recognize(*self.data)
- def get_recorded_time(self):
- return len(self.data[0]) / self.rate
- def recognize(self, seconds=10):
- self.start_recording()
- for i in range(0, int(self.samplerate / self.chunksize
- * seconds)):
- self.process_recording()
- self.stop_recording()
- return self.recognize_recording()
- class NoRecordingError(Exception):
- pass
|