service_streamlit.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. # -*- coding: utf8 -*-
  2. from __future__ import print_function, absolute_import
  3. from tornado.ioloop import IOLoop
  4. from client import Client, ConnectionError
  5. from boxconfig import parse_config
  6. from dejavu.recognize import FilePerSecondRecognizer
  7. from endpoint import setup_endpoint
  8. from calibration import Calibrations
  9. from dejavu import Dejavu, CouldntDecodeError
  10. from multiprocessing import Process
  11. import logging as log
  12. import mutagen.mp3
  13. import math
  14. import sys
  15. import os
  16. import time
  17. from datetime import datetime
  18. import streamlit as st
  19. st.set_page_config(layout="wide")
  20. if sys.version_info >= (3, 0):
  21. from queue import Queue, Empty
  22. else:
  23. from Queue import Queue, Empty
  24. log.basicConfig(format='[%(asctime)s] [%(module)s] %(message)s', level=log.INFO)
  25. AUDIOS_PATH = '/tmp'
  26. AHEAD_TIME_AUDIO_TOLERANCE = 2 # second
  27. MAX_SEGMENT_THREADS = 4
  28. THRESHOLD = 10
  29. SEGMENTS_TOLERANCE_RATE = 0.6
  30. FALL_TOLERANCE_SEGMENTS = 1
  31. # THRESHOLD
  32. THRESHOLD_FIXED = 1
  33. THRESHOLD_AVERAGE = 2
  34. # Modos de procesamiento de queue
  35. # - QUEQUE_SINGLE: procesa solo un segmento a la vez
  36. # - QUEUE_THREAD: inicia un hilo para cada segmento
  37. # Por default se usará el threaded.
  38. # TODO: hacerlo configurable por medio de argumentos
  39. # de ejecución.
  40. QUEUE_SINGLE = 1
  41. QUEUE_THREAD = 2
  42. # Se pueden usar diferentes API'se
  43. # la de threading y la de multiprocessing.
  44. MultiAPI = Process
  45. config = parse_config()
  46. queue = Queue()
  47. client = Client(config['device_id'],
  48. config['apiSecret'])
  49. cloud_base_url = 'https://storage.googleapis.com/{}' \
  50. .format(config['bucket'])
  51. base_path = config.get("basepath", "/var/fourier")
  52. device_id = config['device_id']
  53. device_path = os.path.join(base_path, device_id)
  54. recognizer = FilePerSecondRecognizer
  55. # settings
  56. queue_mode = QUEUE_SINGLE
  57. threshold_mode = THRESHOLD_FIXED
  58. db_path = config.get('localDatabase', os.path.join(device_path, 'files.db'))
  59. #db = sqlite3.connect(db_path)
  60. cloud_cache = {}
  61. def process_segment(anuncios, grabaciones, audios=None, calibration=None):
  62. segment_size = int(calibration['ss'])
  63. audio_length = 0
  64. dejavu = Dejavu({"database_type": "mem"})
  65. try:
  66. for i in range(0, len(anuncios)):
  67. path = "anuncios/{}".format(anuncios[i].name,)
  68. dejavu.fingerprint_file(path)
  69. except Exception as ex:
  70. log.error('[process_segment] cannot fingerprint: {}'.format(ex))
  71. audios_counter = 0
  72. results = []
  73. v = []
  74. st.subheader("Resultados de la comparación")
  75. for i in range(0, len(grabaciones)):
  76. path = "grabaciones/{}".format(grabaciones[i].name,)
  77. values = []
  78. try:
  79. seconds = 0
  80. for match in dejavu.recognize(recognizer, path, segment_size):
  81. name = ""
  82. if "name" in match:
  83. name = match["name"]
  84. results.append({
  85. "path": path,
  86. "name": name,
  87. "confidence": match["confidence"],
  88. "offset": match["offset"],
  89. "offset_seconds": seconds
  90. })
  91. values.append(str(match['confidence']))
  92. seconds += segment_size
  93. v.append(','.join(values))
  94. log.info('{0} {1}'.format(
  95. grabaciones[i].name,
  96. ','.join(values),
  97. ))
  98. st.text('{0} {1}'.format(
  99. grabaciones[i].name,
  100. ','.join(values),
  101. ))
  102. except CouldntDecodeError as ex:
  103. log.error('[process_segment] {}'.format(ex))
  104. try:
  105. encontrados = find_repetitions(results, segments_needed=int(calibration['sn']), calibration=calibration)
  106. st.subheader("Encontrados")
  107. st.write(encontrados)
  108. except ConnectionError as ex:
  109. log.error('[process_segment] {}'.format(str(ex)))
  110. except UserWarning as warn:
  111. log.warning(str(warn))
  112. def find_repetitions(results, segments_needed=2, calibration=None):
  113. found_counter = 0
  114. found_down_counter = 0
  115. found_index = None
  116. expect_space = False
  117. expect_recover = False
  118. last_value_in_threshold_index = -1
  119. fall_tolerance = calibration['tf']
  120. found = []
  121. last_found = None
  122. if threshold_mode == THRESHOLD_FIXED:
  123. threshold = int(calibration['th'])
  124. elif threshold_mode == THRESHOLD_AVERAGE:
  125. values = [x['confidence'] for x in results]
  126. threshold = math.ceil(float(sum(values)) / float(len(values)))
  127. if segments_needed < 1:
  128. segments_needed = 1
  129. for index, result in enumerate(results):
  130. if not expect_space:
  131. if result['confidence'] >= threshold:
  132. found_counter += 1
  133. last_value_in_threshold_index = index
  134. if found_index is None:
  135. found_index = index
  136. if expect_recover:
  137. found_counter += found_down_counter
  138. expect_recover = False
  139. elif fall_tolerance:
  140. if not expect_recover:
  141. if last_value_in_threshold_index != -1:
  142. expect_recover = True
  143. found_down_counter += 1
  144. else:
  145. pass
  146. else:
  147. found_counter = 0
  148. found_down_counter = 0
  149. found_index = None
  150. expect_recover = False
  151. else:
  152. found_counter = 0
  153. found_down_counter = 0
  154. found_index = None
  155. expect_recover = False
  156. else:
  157. if result['confidence'] <= threshold:
  158. expect_space = False
  159. if found_counter >= segments_needed and last_found != found_index:
  160. found_row = results[found_index]
  161. found.append(found_row)
  162. last_found = found_index
  163. found_counter = 0
  164. expect_space = True
  165. return found
  166. def limpiar_archivos():
  167. anuncios = os.listdir('anuncios/')
  168. for audio in anuncios:
  169. if audio.find('.mp3') > -1:
  170. os.remove('anuncios/{}'.format(audio,))
  171. grabaciones = os.listdir('grabaciones/')
  172. for audio in grabaciones:
  173. if audio.find('.mp3') > -1:
  174. os.remove('grabaciones/{}'.format(audio))
  175. def main():
  176. st.subheader('Subir archivos para comparar')
  177. u1, u2 = st.beta_columns([3, 3])
  178. anuncios = u1.file_uploader("Anuncios", accept_multiple_files=True, type="mp3")
  179. for i in range(0, len(anuncios)):
  180. with open("anuncios/{}".format(anuncios[i].name,), "wb") as audio:
  181. audio.write(anuncios[i].getvalue())
  182. grabaciones = u2.file_uploader("Grabaciones", accept_multiple_files=True, type="mp3")
  183. grabaciones.sort(key=lambda x: x.name)
  184. for i in range(0, len(grabaciones)):
  185. with open("grabaciones/{}".format(grabaciones[i].name,), "wb") as audio:
  186. audio.write(grabaciones[i].getvalue())
  187. if st.button("Borrar archivos anteriores"):
  188. limpiar_archivos()
  189. st.subheader('Parámetros de calibración')
  190. col1, col2, col3, col4 = st.beta_columns([1,1,1,1])
  191. umbral = col1.text_input("Umbral", 12)
  192. segmentos_necesarios = col2.text_input("Sementos necesarios", 4)
  193. caida = col3.text_input("Tolerancia a caida", 1)
  194. segmento = col4.text_input("Tamaño del Segmento", 5)
  195. calibracion = {
  196. "th": umbral,
  197. "tf": caida,
  198. "sn": segmentos_necesarios,
  199. "ss": segmento
  200. }
  201. if st.button("Comparar"):
  202. process_segment(anuncios, grabaciones, calibration=calibracion)
  203. main()