In [1]:
import warnings
warnings.simplefilter("ignore", UserWarning)

from __future__ import print_function, absolute_import
from tornado.ioloop import IOLoop
from client import Client, ConnectionError
from boxconfig import parse_config
from dejavu.recognize import FilePerSecondRecognizer
from dejavu import Dejavu, CouldntDecodeError
from endpoint import setup_endpoint
from multiprocessing import Process
import logging as log
import requests
import dateutil
import math
import time
import os

from queue import Queue, Empty

In [2]:
PATH = '/tmp'
AHEAD_TIME_AUDIO_TOLERANCE = 2 # second
MAX_SEGMENT_THREADS = 4
THRESHOLD = 10
SEGMENTS_TOLERANCE_RATE = 0.6
FALL_TOLERANCE_SEGMENTS = 1

In [3]:
THRESHOLD_FIXED = 1
THRESHOLD_AVERAGE = 2

In [4]:
QUEUE_SINGLE = 1
QUEUE_THREAD = 2

In [5]:
MultiAPI = Process

In [6]:
config = parse_config()
queue = Queue()

cloud_base_url = 'https://storage.googleapis.com/{}' \
 .format(config['bucket'])
recognizer = FilePerSecondRecognizer

In [7]:
queue_mode = QUEUE_SINGLE
threshold_mode = THRESHOLD_FIXED

In [8]:
def obt_siguiente_trabajo():
 url = 'https://api.fourier.audio/na/calendario/pendiente?id=%s' % (config['device_id'],)
 response = requests.get(url)
 log.info(response.json())
 return response.json()

In [9]:
def descargar_anuncio(ad_path):
 anuncio = os.path.basename(ad_path)
 path = os.path.join(PATH, 'ads')
 os.makedirs(path, exist_ok=True)
 ruta_anuncio = os.path.join(path, anuncio)

 if os.path.isfile(ruta_anuncio):
 return ruta_anuncio

 url = '{}/{}'.format(cloud_base_url, ad_path)
 response = requests.get(url)

 # TODO: Agregar alerta cuando la respuesta no sea 200
 if response.status_code == 200:
 with open(ruta_anuncio, "wb") as fp:
 fp.write(response.content)
 return ruta_anuncio

 else:
 print("Error al descargar")
 print(response)
 return None


In [10]:
def descargar_media(box, station, media):
 ref = '{}/{}/{}'.format(box, station, media)
 file = os.path.basename(ref)
 path = os.path.join(PATH, 'fourier', box, station)
 os.makedirs(path, exist_ok=True)
 out_file = os.path.join(path, file)

 if os.path.isfile(out_file):
 return out_file

 filename = ref.replace("/","%2F") \
 .replace("+","%2B")
 url = '{}/{}'.format(cloud_base_url, filename)
 response = requests.get(url)

 if response.status_code == 200:
 with open(out_file, "wb") as fp:
 fp.write(response.content)
 return out_file
 else:
 print("Error al descargar")
 print(response)
 return None


In [11]:
def obt_calibracion(calibracion):
 default = {
 'threshold': 12,
 'tolerance': 0.8,
 'fallTolerance': 1,
 'segmentSize': 5,
 }

 if 'threshold' in calibracion:
 default['threshold'] = calibracion['threshold']
 if 'tolerance' in calibracion:
 default['tolerance'] = calibracion['tolerance']
 if 'segmentSize' in calibracion:
 default['segmentSize'] = calibracion['segmentSize']
 if 'fallTolerance' in calibracion:
 default['fallTolerance'] = calibracion['fallTolerance']

 return default

In [12]:
def encontrar_resultados(resultados, segments_needed=4, calibration=None):
 found_counter = 0
 found_down_counter = 0
 found_index = None
 expect_space = False
 expect_recover = False
 last_value_in_threshold_index = -1
 fall_tolerance = calibration['fallTolerance']
 found = []

 if threshold_mode == THRESHOLD_FIXED:
 threshold = calibration['threshold']
 elif threshold_mode == THRESHOLD_AVERAGE:
 values = [x['confidence'] for x in resultados]
 threshold = math.ceil(float(sum(values)) / float(len(values)))

 if segments_needed < 1:
 segments_needed = 1

 for index, result in enumerate(resultados):
 if not expect_space:
 if result['confidence'] >= threshold:
 found_counter += 1
 last_value_in_threshold_index = index
 if found_index is None:
 found_index = index
 if expect_recover:
 found_counter += found_down_counter
 expect_recover = False

 elif fall_tolerance:
 if not expect_recover:
 if last_value_in_threshold_index != -1:
 expect_recover = True
 found_down_counter += 1
 else:
 pass
 else:
 found_counter = 0
 found_down_counter = 0
 found_index = None
 expect_recover = False

 else:
 found_counter = 0
 found_down_counter = 0
 found_index = None
 expect_recover = False

 else:
 if result['confidence'] <= threshold:
 expect_space = False

 if found_counter >= segments_needed:
 found_row = resultados[found_index]
 found.append(found_row)
 found_counter = 0
 expect_space = True

 return found

In [13]:
def enviar_resultados(item):
 url = 'https://api.fourier.audio/v1/calendario/resultado'
 response = requests.post(url, json=item)
 return response

In [114]:
pendiente = obt_siguiente_trabajo()

In [28]:
# Procesar elemento pendiente

ciudad = pendiente['origen']
estacion = pendiente['estacion']
calibracion = obt_calibracion(pendiente['calibracion'])
tolerance = calibracion['tolerance']
tamano_segmento = calibracion['segmentSize']
longitud_audio = 30

In [29]:
try:
 anuncios = []
 id_by_ad = {}
 item_ids = []
 x = 0
 for i in pendiente["elementos"]:
 x = x + 1
 id_by_ad[i['anuncio']] = i['id']
 if i['id'] not in item_ids:
 item_ids.append(i['id'])

 anuncio = descargar_anuncio(i["ruta"])
 if anuncio is not None:
 anuncios.append(anuncio)
 else:
 print('[process_segment] ad file missing')

except Exception as err:
 print('[process_segment] [{}] {}'.format(estacion, err))

In [30]:
try:
 media = []
 for i in pendiente["media"]:
 archivo = descargar_media(ciudad, estacion, i["ruta"])
 if archivo is not None:
 media.append((archivo, i["fecha"], i["timestamp"]))

except Exception as err:
 print(err)

In [None]:
dejavu = Dejavu({"database_type": "mem"})
try:
 x = 0
 for anuncio in anuncios:
 dejavu.fingerprint_file(anuncio)
except Exception as ex:
 print(err)

In [None]:
if tamano_segmento == 'integer':
 tamano_segmento = int(longitud_audio)
elif tamano_segmento == 'ceil':
 tamano_segmento = int(math.ceil(longitud_audio / 5)) * 5

segmentos_necesarios = int(round(float(longitud_audio) / float(tamano_segmento)))
segmentos_necesarios = int(round(segmentos_necesarios * tolerance))

In [None]:
anuncios_en_paralelo = 5 # Este valor debe venir desde el php

dejavu = None
cont_media = len(media)
cont_anuncios = len(anuncios)
resultados = []
v = []
try:
 x = 0
 while x < cont_anuncios:
 y = 0
 dejavu = Dejavu({"database_type": "mem"})
 print("Nueva comparación")
 while y < anuncios_en_paralelo and x < cont_anuncios:
 anuncio = anuncios[x]
 print("Agregando anuncio %s %s" % (x, os.path.split(anuncio)[-1],))
 dejavu.fingerprint_file(anuncio)
 y += 1
 x += 1

 for ruta, fecha, ts in media:
 values = []
 try:
 for match in dejavu.recognize(recognizer, ruta, tamano_segmento):
 name = None
 ad = None
 try:
 ad = match['name']
 if match['name'] in id_by_ad.keys():
 name = id_by_ad[match['name']]
 else:
 name = match['name']

 except KeyError:
 pass

 resultados.append({
 'ad': ad,
 'confidence': match['confidence'],
 'timestamp': ts,
 'offset': match['offset'],
 'name': name
 })
 values.append(str(match['confidence']))
 ts = ts + 5

 v.append(','.join(values))
 print('[process_segment] [{2}] {0} {1}'.format(
 os.path.split(ruta)[-1],
 ','.join(values),
 estacion,
 ))

 except CouldntDecodeError as ex:
 log.error('[process_segment] {}'.format(ex))

 try:
 encontrados = {}
 for i in item_ids:
 resultado = [r for r in resultados if r["name"] == i]
 encontrados[i] = encontrar_resultados(resultado, 
 segments_needed=tamano_segmento,
 calibration=calibracion,)

 for id in encontrados:
 for e in encontrados[id]:
 for i in pendiente['elementos']:
 if i['id'] == id and i['anuncio'] == e['ad']:
 if 'encontrados' not in i:
 i['encontrados'] = []
 i['encontrados'].append(e)
 break

 pendiente["archivos_perdidos"] = 0
 pendiente["total_archivos"] = cont_media
 # response = enviar_resultados(pendiente)
 except ConnectionError as ex:
 pass
 except UserWarning as warn:
 pass

except Exception as ex:
 print(err)

In [None]:
try:
 encontrados = {}
 for i in item_ids:
 r = [result for result in resultados if result["name"] == i]
 encontrados[i] = encontrar_resultados(r, segments_needed=segmentos_necesarios, calibration=calibracion,)

 for id in encontrados:
 for e in encontrados[id]:
 for i in pendiente['elementos']:
 if i['id'] == id and i['anuncio'] == e['ad']:
 if 'encontrados' not in i:
 i['encontrados'] = []
 i['encontrados'].append(e)
 break

 pendiente["archivos_perdidos"] = 0
 pendiente["total_archivos"] = cont_media
 response = enviar_resultados(pendiente)
except ConnectionError as ex:
 pass
except UserWarning as warn:
 pass


In [115]:
dejavu = None
resultados = {}
try:
 dejavu = Dejavu({"database_type": "mem"})
 try:
 x = 0
 for ruta, fecha, ts in media:
 log.info("Huellando %s" % (ruta,))
 dejavu.fingerprint_file(ruta, ts)
 except Exception as ex:
 log.info(ex)

 for anuncio in anuncios:
 log.info("Buscando anuncio %s" % (anuncio,))
 for i in dejavu.recognize(recognizer, anuncio, 5):
 if not "id" in i:
 continue

 if i["confidence"] < 50:
 continue

 obj = i
 obj["match_time"] = None
 nombre_anuncio = os.path.split(anuncio)[-1]
 id = id_by_ad[nombre_anuncio]
 dict = {
 "id": id,
 "anuncio": anuncio,
 "timestamp": obj["name"] + int(obj['offset_seconds']),
 "confianza": obj["confidence"],
 "longitud": obj["length"],
 "desfase_segundos": obj["offset_seconds"]
 }

 if i["id"] in resultados.keys():
 resultados[i["id"]]["longitud"] = resultados[i["id"]]["longitud"] + dict["longitud"]
 resultados[i["id"]]["confianza"] = resultados[i["id"]]["confianza"] + dict["confianza"]
 continue

 resultados[i["id"]] = dict

except Exception as ex:
 log.info(ex)

In [116]:
for id in resultados:
 e = resultados[id]
 for i in pendiente['elementos']:
 anuncio = e['anuncio'].replace('/tmp/ads/', '')
 if i['id'] == e['id'] and i['anuncio'] == anuncio:
 if 'encontrados' not in i:
 i['encontrados'] = []
 i['encontrados'].append(e)
 print(e)
 break

{'id': 46057082, 'anuncio': '/tmp/ads/-MlReJO2cHvAi7XG40mw', 'timestamp': 1630504399, 'confianza': 980, 'longitud': 30000, 'desfase_segundos': 199.39439}


In [63]:
import json

In [118]:
enviar_resultados(pendiente)



In [117]:
json.dumps(pendiente)

'{"idCampana": 1768, "idCampanaPauta": 1166150, "ciudad": "prueba", "idEstacion": 10, "origen": "g2yJhO8z", "fecha": "2021-09-01 06:00:00", "zonaHoraria": "America/Hermosillo", "estacion": "101_7_SON_OBR", "media": [{"ruta": "2021/09/01/2021-09-01T06-00-00-0700.mp3", "fecha": "2021-09-01 06:00:00", "timestamp": 1630501200}, {"ruta": "2021/09/01/2021-09-01T06-05-00-0700.mp3", "fecha": "2021-09-01 06:05:00", "timestamp": 1630501500}, {"ruta": "2021/09/01/2021-09-01T06-10-00-0700.mp3", "fecha": "2021-09-01 06:10:00", "timestamp": 1630501800}, {"ruta": "2021/09/01/2021-09-01T06-15-00-0700.mp3", "fecha": "2021-09-01 06:15:00", "timestamp": 1630502100}, {"ruta": "2021/09/01/2021-09-01T06-20-00-0700.mp3", "fecha": "2021-09-01 06:20:00", "timestamp": 1630502400}, {"ruta": "2021/09/01/2021-09-01T06-25-00-0700.mp3", "fecha": "2021-09-01 06:25:00", "timestamp": 1630502700}, {"ruta": "2021/09/01/2021-09-01T06-30-00-0700.mp3", "fecha": "2021-09-01 06:30:00", "timestamp": 1630503000}, {"ruta": "2021

In [94]:
resultados

{'7798E7A7396865717C5FF58D40533B17D05D90DC': {'id': 46057064,
 'anuncio': '/tmp/ads/-MlRe9DwDJVtYlLS-6PS',
 'fecha': 1630503000,
 'confianza': 78,
 'longitud': 5000,
 'desfase_segundos': 285.21535},
 '373A10DF13ADBE563AB4A6F39D9DF0C73AA537FB': {'id': 46057082,
 'anuncio': '/tmp/ads/-MlReJO2cHvAi7XG40mw',
 'fecha': 1630504200,
 'confianza': 980,
 'longitud': 30000,
 'desfase_segundos': 199.39439}}

In [None]:
item["elementos"]

In [None]:
cloud_download_file(item["origen"], item["estacion"], item['archivos'][0]['filename'])

In [None]:
path = os.path.join(AUDIOS_PATH, 'fourier', 'ciudad', 'estacion')

In [None]:
path

In [None]:
os.makedirs(path, exist_ok=True)