service_streamlit.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. # -*- coding: utf8 -*-
  2. from __future__ import print_function, absolute_import
  3. from tornado.ioloop import IOLoop
  4. from client import Client, ConnectionError
  5. from boxconfig import parse_config
  6. from dejavu.recognize import FilePerSecondRecognizer
  7. from endpoint import setup_endpoint
  8. from calibration import Calibrations
  9. from dejavu import Dejavu, CouldntDecodeError
  10. from multiprocessing import Process
  11. import logging as log
  12. import mutagen.mp3
  13. import math
  14. import sys
  15. import os
  16. from datetime import datetime
  17. import streamlit as st
  18. if sys.version_info >= (3, 0):
  19. from queue import Queue, Empty
  20. else:
  21. from Queue import Queue, Empty
  22. log.basicConfig(format='[%(asctime)s] [%(module)s] %(message)s', level=log.INFO)
  23. AUDIOS_PATH = '/tmp'
  24. AHEAD_TIME_AUDIO_TOLERANCE = 2 # second
  25. MAX_SEGMENT_THREADS = 4
  26. THRESHOLD = 10
  27. SEGMENTS_TOLERANCE_RATE = 0.6
  28. FALL_TOLERANCE_SEGMENTS = 1
  29. # THRESHOLD
  30. THRESHOLD_FIXED = 1
  31. THRESHOLD_AVERAGE = 2
  32. # Modos de procesamiento de queue
  33. # - QUEQUE_SINGLE: procesa solo un segmento a la vez
  34. # - QUEUE_THREAD: inicia un hilo para cada segmento
  35. # Por default se usará el threaded.
  36. # TODO: hacerlo configurable por medio de argumentos
  37. # de ejecución.
  38. QUEUE_SINGLE = 1
  39. QUEUE_THREAD = 2
  40. # Se pueden usar diferentes API'se
  41. # la de threading y la de multiprocessing.
  42. MultiAPI = Process
  43. config = parse_config()
  44. queue = Queue()
  45. client = Client(config['device_id'],
  46. config['apiSecret'])
  47. cloud_base_url = 'https://storage.googleapis.com/{}' \
  48. .format(config['bucket'])
  49. base_path = config.get("basepath", "/var/fourier")
  50. device_id = config['device_id']
  51. device_path = os.path.join(base_path, device_id)
  52. recognizer = FilePerSecondRecognizer
  53. # settings
  54. queue_mode = QUEUE_SINGLE
  55. threshold_mode = THRESHOLD_FIXED
  56. db_path = config.get('localDatabase', os.path.join(device_path, 'files.db'))
  57. #db = sqlite3.connect(db_path)
  58. cloud_cache = {}
  59. def process_segment(anuncios, grabaciones, audios=None, calibration=None):
  60. """ Procesa una hora de audio """
  61. # print(anuncio +" y "+ audio_busqueda)
  62. # date = dateutil.parser.parse(item['fecha'], ignoretz=True)
  63. segment_size = 5
  64. audio_length = 0
  65. anuncios = ["anuncio"]
  66. dejavu = Dejavu({"database_type": "mem"})
  67. try:
  68. for i in range(0, len(anuncios)):
  69. dejavu.fingerprint_file("anuncios/audio-{}.mp3".format(i,))
  70. except Exception as ex:
  71. log.error('[process_segment] cannot fingerprint: {}'.format(ex))
  72. audios_counter = 0
  73. results = []
  74. v = []
  75. for i in range(0, len(grabaciones)):
  76. path = "grabaciones/audio-{}.mp3".format(i,)
  77. values = []
  78. try:
  79. for match in dejavu.recognize(recognizer, path, segment_size):
  80. name = path
  81. results.append({
  82. 'confidence': match['confidence'],
  83. 'offset': match['offset'],
  84. 'name': name
  85. })
  86. values.append(str(match['confidence']))
  87. v.append(','.join(values))
  88. log.info('[process_segment] {0} {1}'.format(
  89. os.path.split(path)[-1],
  90. ','.join(values),
  91. ))
  92. st.text('[process_segment] {0} {1}'.format(
  93. os.path.split(path)[-1],
  94. ','.join(values),
  95. ))
  96. except CouldntDecodeError as ex:
  97. log.error('[process_segment] {}'.format(ex))
  98. try:
  99. encontrados = {}
  100. item_ids = []
  101. for i in item_ids:
  102. r = [result for result in results if result["name"] == i]
  103. encontrados[i] = find_repetitions(r, segments_needed=segments_needed, calibration=calibration,)
  104. #for id in encontrados:r
  105. # for e in encontrados[id]:
  106. # for i in item['elementos']:
  107. # if i['id'] == id and i['anuncio'] == e['ad']:
  108. # if 'encontrados' not in i:
  109. # i['encontrados'] = []
  110. # i['encontrados'].append(e)
  111. # break
  112. #item["archivos_perdidos"] = (12 - audios_counter) if audios_counter < 12 else 0
  113. except ConnectionError as ex:
  114. log.error('[process_segment] {}'.format(str(ex)))
  115. except UserWarning as warn:
  116. log.warning(str(warn))
  117. def find_repetitions(results, segments_needed=2, calibration=None):
  118. found_counter = 0
  119. found_down_counter = 0
  120. found_index = None
  121. expect_space = False
  122. expect_recover = False
  123. last_value_in_threshold_index = -1
  124. fall_tolerance = calibration['fallTolerance']
  125. found = []
  126. high = 100 # Obtener este valor desde un parámetro
  127. middle_high = 50 # Obtener este valor desde un parámetro
  128. segment_middle_needed = 2 # Obtener este valor desde un parámetro
  129. found_high = None
  130. found_middle_high = []
  131. if threshold_mode == THRESHOLD_FIXED:
  132. threshold = calibration['threshold']
  133. elif threshold_mode == THRESHOLD_AVERAGE:
  134. values = [x['confidence'] for x in results]
  135. threshold = math.ceil(float(sum(values)) / float(len(values)))
  136. if segments_needed < 1:
  137. segments_needed = 1
  138. for index, result in enumerate(results):
  139. #if result['confidence'] >= high:
  140. # if found_high is None:
  141. # found_high = index
  142. # elif result['confidence'] > results[found_high]['confidence']:
  143. # found_high = index
  144. #elif result['confidence'] >= middle_high:
  145. # found_middle_high.append(index)
  146. if not expect_space:
  147. if result['confidence'] >= threshold:
  148. found_counter += 1
  149. last_value_in_threshold_index = index
  150. if found_index is None:
  151. found_index = index
  152. if expect_recover:
  153. found_counter += found_down_counter
  154. expect_recover = False
  155. elif fall_tolerance:
  156. if not expect_recover:
  157. if last_value_in_threshold_index != -1:
  158. """ Solo cuando ya haya entrado por lo menos
  159. un valor en el rango del threshold, es cuando
  160. se podrá esperar un valor bajo """
  161. expect_recover = True
  162. found_down_counter += 1
  163. else:
  164. pass
  165. else:
  166. """ Si después de haber pasado tolerado 1 elemento
  167. vuelve a salir otro fuera del threshold continuo,
  168. entonces ya se da por perdido """
  169. found_counter = 0
  170. found_down_counter = 0
  171. found_index = None
  172. expect_recover = False
  173. else:
  174. found_counter = 0
  175. found_down_counter = 0
  176. found_index = None
  177. expect_recover = False
  178. # Aquí veremos si hay un valor alto
  179. #if found_high is not None:
  180. # found_row = results[found_high]
  181. # found.append(found_row)
  182. #elif len(found_middle_high) >= segment_middle_needed:
  183. # found_row = results[found_middle_high[0]]
  184. # found.append(found_row)
  185. #found_high = None
  186. #found_middle_high = []
  187. else:
  188. if result['confidence'] <= threshold:
  189. expect_space = False
  190. if found_counter >= segments_needed:
  191. found_row = results[found_index]
  192. found.append(found_row)
  193. found_counter = 0
  194. expect_space = True
  195. #found_high = None
  196. #found_middle_high = []
  197. return found
  198. def limpiar_archivos():
  199. anuncios = os.listdir('anuncios/')
  200. for audio in anuncios:
  201. os.remove('anuncios/{}'.format(audio,))
  202. grabaciones = os.listdir('grabaciones/')
  203. for audio in grabaciones:
  204. os.remove('grabaciones/{}'.format(audio))
  205. def main():
  206. if st.button("Limpiar archivos"):
  207. limpiar_archivos()
  208. anuncios = st.file_uploader("Elige los anuncios", accept_multiple_files=True, type="mp3")
  209. for i in range(0, len(anuncios)):
  210. with open("anuncios/audio-{}.mp3".format(i,), "wb") as audio:
  211. audio.write(anuncios[i].getvalue())
  212. grabaciones = st.file_uploader("Elige la grabación", accept_multiple_files=True, type="mp3")
  213. for i in range(0, len(grabaciones)):
  214. with open("grabaciones/audio-{}.mp3".format(i,), "wb") as audio:
  215. audio.write(grabaciones[i].getvalue())
  216. if st.button("Comparar"):
  217. process_segment(anuncios, grabaciones)
  218. main()