recognize.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. from __future__ import absolute_import
  2. import dejavu.fingerprint as fingerprint
  3. import dejavu.decoder as decoder
  4. import numpy as np
  5. import pyaudio
  6. import time
  7. import sys
  8. import os
  9. import struct
  10. import json
  11. if sys.version_info > (3, 0):
  12. from itertools import zip_longest as izip_longest
  13. else:
  14. from itertools import izip_longest
  15. class BaseRecognizer(object):
  16. def __init__(self, dejavu):
  17. self.dejavu = dejavu
  18. self.Fs = fingerprint.DEFAULT_FS
  19. def _recognize(self, ads_filter, *data):
  20. matches = []
  21. for d in data:
  22. matches.extend(self.dejavu.find_matches(d, Fs=self.Fs, ads_filter=ads_filter))
  23. return self.dejavu.align_matches(matches)
  24. def recognize(self):
  25. pass # base class does nothing
  26. class FileRecognizer(BaseRecognizer):
  27. def __init__(self, dejavu):
  28. super(FileRecognizer, self).__init__(dejavu)
  29. def recognize_file(self, filename):
  30. frames, self.Fs, file_hash, length = decoder.read(filename, self.dejavu.limit)
  31. t = time.time()
  32. match = self._recognize(*frames)
  33. t = time.time() - t
  34. if match:
  35. match['match_time'] = t
  36. match['length'] = length
  37. return match
  38. def recognize(self, filename):
  39. return self.recognize_file(filename)
  40. class FilePerSecondRecognizer(BaseRecognizer):
  41. def __init__(self, dejavu):
  42. super(FilePerSecondRecognizer, self).__init__(dejavu)
  43. def recognize(self, file_path, seconds = 1, callback = None, ads_filter = None):
  44. for frames, _, _, length in decoder.read_chunks(file_path, chunk_size = seconds):
  45. if length == 0:
  46. break
  47. t0 = time.time()
  48. match = self._recognize(ads_filter, *frames)
  49. t0 = time.time() - t0
  50. if match:
  51. match["match_time"] = match
  52. else:
  53. match = {
  54. 'confidence': 0,
  55. 'offset': 0
  56. }
  57. match['length'] = length
  58. yield match
  59. class StdinRecognizer(BaseRecognizer):
  60. def __init__(self, dejavu):
  61. super(StdinRecognizer, self).__init__(dejavu)
  62. def recognize_file(self, filename, fmt, ads_filter):
  63. frames, self.Fs, file_hash, length = decoder.read(filename, self.dejavu.limit, fmt)
  64. t = time.time()
  65. match = self._recognize(ads_filter, *frames)
  66. t = time.time() - t
  67. if match:
  68. match['match_time'] = t
  69. else:
  70. match = {}
  71. match['confidence'] = 0
  72. match['length'] = length
  73. return match
  74. def recognize(self, seconds = 1, callback = None, ads_filter = None):
  75. rnd = struct.unpack("<L", os.urandom(4))[0]
  76. fname = "/tmp/{0}".format(rnd)
  77. sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
  78. while True:
  79. tmpfile = open(fname, "wb")
  80. t0 = time.time()
  81. ln = 0
  82. while ln < 1024*17*seconds:
  83. data = sys.stdin.read(1024)
  84. ln += len(data)
  85. tmpfile.write(data)
  86. tmpfile.flush()
  87. rec = self.recognize_file(fname, "mp3", ads_filter)
  88. rec["datetime"] = t0
  89. if callback is not None:
  90. callback(rec)
  91. else:
  92. print(json.dumps(rec))
  93. tmpfile.close()
  94. class MicrophoneRecognizer(BaseRecognizer):
  95. default_chunksize = 8192
  96. default_format = pyaudio.paInt16
  97. default_channels = 2
  98. default_samplerate = 44100
  99. def __init__(self, dejavu):
  100. super(MicrophoneRecognizer, self).__init__(dejavu)
  101. self.audio = pyaudio.PyAudio()
  102. self.stream = None
  103. self.data = []
  104. self.channels = MicrophoneRecognizer.default_channels
  105. self.chunksize = MicrophoneRecognizer.default_chunksize
  106. self.samplerate = MicrophoneRecognizer.default_samplerate
  107. self.recorded = False
  108. def start_recording(self, channels=default_channels,
  109. samplerate=default_samplerate,
  110. chunksize=default_chunksize):
  111. self.chunksize = chunksize
  112. self.channels = channels
  113. self.recorded = False
  114. self.samplerate = samplerate
  115. if self.stream:
  116. self.stream.stop_stream()
  117. self.stream.close()
  118. self.stream = self.audio.open(
  119. format=self.default_format,
  120. channels=channels,
  121. rate=samplerate,
  122. input=True,
  123. frames_per_buffer=chunksize,
  124. )
  125. self.data = [[] for i in range(channels)]
  126. def process_recording(self):
  127. data = self.stream.read(self.chunksize)
  128. nums = np.fromstring(data, np.int16)
  129. for c in range(self.channels):
  130. self.data[c].extend(nums[c::self.channels])
  131. def stop_recording(self):
  132. self.stream.stop_stream()
  133. self.stream.close()
  134. self.stream = None
  135. self.recorded = True
  136. def recognize_recording(self):
  137. if not self.recorded:
  138. raise NoRecordingError("Recording was not complete/begun")
  139. return self._recognize(*self.data)
  140. def get_recorded_time(self):
  141. return len(self.data[0]) / self.rate
  142. def recognize(self, seconds=10):
  143. self.start_recording()
  144. for i in range(0, int(self.samplerate / self.chunksize
  145. * seconds)):
  146. self.process_recording()
  147. self.stop_recording()
  148. return self.recognize_recording()
  149. class NoRecordingError(Exception):
  150. pass