|
@@ -7,6 +7,7 @@ from fourier.boxconfig import parse_config
|
|
|
from fourier.dejavu.recognize import FilePerSecondRecognizer
|
|
|
from datetime import datetime, timedelta
|
|
|
from ondemand.endpoint import setup_endpoint
|
|
|
+from ondemand.calibration import Calibrations
|
|
|
from fourier.dejavu import Dejavu
|
|
|
from firebase_admin import credentials
|
|
|
from firebase_admin import db as fbdb
|
|
@@ -18,6 +19,7 @@ from argparse import ArgumentParser
|
|
|
import logging as log
|
|
|
import firebase_admin
|
|
|
import mutagen.mp3
|
|
|
+import OpenSSL.SSL
|
|
|
import requests
|
|
|
import dateutil
|
|
|
import sqlite3
|
|
@@ -31,7 +33,7 @@ if sys.version_info >= (3, 0):
|
|
|
else:
|
|
|
from Queue import Queue, Empty
|
|
|
|
|
|
-log.basicConfig(format='[%(asctime)s] %(message)s', level=log.INFO)
|
|
|
+log.basicConfig(format='[%(asctime)s] [%(module)s] %(message)s', level=log.INFO)
|
|
|
|
|
|
AUDIOS_PATH = '/tmp'
|
|
|
AHEAD_TIME_AUDIO_TOLERANCE = 2 # second
|
|
@@ -70,9 +72,8 @@ dejavu = Dejavu({"database_type":"mem"})
|
|
|
device_id = config['device_id']
|
|
|
device_path = os.path.join(base_path, device_id)
|
|
|
recognizer = FilePerSecondRecognizer
|
|
|
-calibrations = {}
|
|
|
device_ref = fbdb.reference('devices').child(config['device_id'])
|
|
|
-calibrations_ref = device_ref.child('calibrations')
|
|
|
+calibrations = Calibrations(config['device_id'], client=client)
|
|
|
|
|
|
# settings
|
|
|
queue_mode = QUEUE_THREAD
|
|
@@ -113,6 +114,7 @@ def feed_queue():
|
|
|
""" Errores desconocidos """
|
|
|
log.error('[feed_queue] {}'.format(ex))
|
|
|
loop.add_timeout(time.time() + 60, feed_queue)
|
|
|
+ raise ex
|
|
|
|
|
|
def process_queue():
|
|
|
""" Try to the next item in a queue and start
|
|
@@ -141,18 +143,33 @@ def process_queue_with_threads():
|
|
|
item = queue.get(False)
|
|
|
station = item['station']
|
|
|
date = dateutil.parser.parse(item['date'])
|
|
|
+ calibration = calibrations.get(station)
|
|
|
+
|
|
|
+ audios = [f for f in iterate_audios(
|
|
|
+ date, station,
|
|
|
+ calibration=calibration
|
|
|
+ )]
|
|
|
|
|
|
thread = MultiAPI(target=process_segment,
|
|
|
args=(item,),
|
|
|
kwargs={
|
|
|
- 'audios': [f for f in iterate_audios(date, station)]
|
|
|
+ 'audios': audios,
|
|
|
+ 'calibration': calibration,
|
|
|
}
|
|
|
)
|
|
|
threads[index] = thread
|
|
|
thread.start()
|
|
|
+
|
|
|
except Empty:
|
|
|
is_drained = True
|
|
|
|
|
|
+ except Exception as err:
|
|
|
+ log.error('[process_queue_with_threads] [{}] {}'.format(
|
|
|
+ station,
|
|
|
+ err,
|
|
|
+ ))
|
|
|
+ continue
|
|
|
+
|
|
|
elif not t.is_alive():
|
|
|
threads[index] = None
|
|
|
|
|
@@ -163,42 +180,64 @@ def process_queue_with_threads():
|
|
|
log.info('Finished thread processing')
|
|
|
loop.add_callback(feed_queue)
|
|
|
|
|
|
-def process_segment(item, audios=None):
|
|
|
+def process_segment(item, audios=None, calibration=None):
|
|
|
""" Procesa una hora de audio """
|
|
|
|
|
|
station = item['station']
|
|
|
- date = dateutil.parser.parse(item['date'])
|
|
|
|
|
|
- log.info('processing segment: {}'.format(item))
|
|
|
+ if not calibration:
|
|
|
+ calibration = calibrations.get(station)
|
|
|
|
|
|
- # 0. Obtener la información de calibración
|
|
|
- calibration = calibrations_ref.child(station)
|
|
|
- print(calibration)
|
|
|
+ tolerance = calibration['tolerance']
|
|
|
+ date = dateutil.parser.parse(item['date'])
|
|
|
+ segment_size = calibration['segmentSize']
|
|
|
+ audio_length = 0
|
|
|
+
|
|
|
+ log.info('[process_segment] (th: {}, tl: {}, ft: {}, ss: {}, ho: {}) {}'\
|
|
|
+ .format(
|
|
|
+ calibration['threshold'],
|
|
|
+ calibration['tolerance'],
|
|
|
+ calibration['fallTolerance'],
|
|
|
+ calibration['segmentSize'],
|
|
|
+ calibration['hourlyOffset'],
|
|
|
+ item,
|
|
|
+ )
|
|
|
+ )
|
|
|
|
|
|
# 1. obtener el audio desde firebase
|
|
|
# y calcular su fingerprint.
|
|
|
- filename, md5hash = cloud_download(ad_key=item['ad'])
|
|
|
- if not filename:
|
|
|
- log.info('ad file missing')
|
|
|
+ try:
|
|
|
+ filename, md5hash = cloud_download(ad_key=item['ad'])
|
|
|
+ if not filename:
|
|
|
+ log.info('[process_segment] ad file missing')
|
|
|
+ return
|
|
|
+ except Exception as err:
|
|
|
+ log.error('[process_segment] [{}] {}'.format(station, err))
|
|
|
return
|
|
|
|
|
|
# 1.1 Calcular el número de segmentos requeridos
|
|
|
# de acuerdo a la duración total del audio.
|
|
|
try:
|
|
|
audio = mutagen.mp3.MP3(filename)
|
|
|
- segments_needed = int(round(float(audio.info.length) / float(5)))
|
|
|
- segments_needed = int(round(
|
|
|
- segments_needed * SEGMENTS_TOLERANCE_RATE
|
|
|
- ))
|
|
|
+ audio_length = audio.info.length
|
|
|
+
|
|
|
+ if segment_size == 'integer':
|
|
|
+ segment_size = int(audio_length)
|
|
|
+ elif segment_size == 'ceil':
|
|
|
+ segment_size = int(math.ceil(audio_length / 5)) * 5
|
|
|
+
|
|
|
+ segments_needed = int(round(float(audio_length) / float(segment_size)))
|
|
|
+ segments_needed = int(round(segments_needed * tolerance))
|
|
|
+
|
|
|
except Exception as ex:
|
|
|
- log.error('file {} is not an mp3'.format(filename))
|
|
|
+ log.error('[process_segment] file {} is not an mp3'.format(filename))
|
|
|
log.error(str(ex))
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
dejavu.fingerprint_file(filename)
|
|
|
except Exception as ex:
|
|
|
- log.error('cannot fingerprint: {}'.format(ex))
|
|
|
+ log.error('[process_segment] cannot fingerprint: {}'.format(ex))
|
|
|
|
|
|
""" Hay dos posibles escensarios al obtener los audios
|
|
|
a. Los audios vienen por el parámetro "audios" de la
|
|
@@ -206,8 +245,12 @@ def process_segment(item, audios=None):
|
|
|
b. Los audios se obtienen directamente de la base
|
|
|
de datos en modo de cursor.
|
|
|
"""
|
|
|
- audios_iterable = audios if audios \
|
|
|
- else iterate_audios(date, station)
|
|
|
+ try:
|
|
|
+ audios_iterable = audios if audios \
|
|
|
+ else iterate_audios(date, station, calibration=calibration)
|
|
|
+ except sqlite3.OperationalError as err:
|
|
|
+ log.error('[process_segment] [{}] {}'.format(station, err))
|
|
|
+ return
|
|
|
|
|
|
# 2. Read the list of files from local database
|
|
|
audios_counter = 0
|
|
@@ -215,10 +258,16 @@ def process_segment(item, audios=None):
|
|
|
|
|
|
try:
|
|
|
for path, name, ts in audios_iterable:
|
|
|
- log.info('file: {}'.format(path))
|
|
|
+ short_path = os.path.join(station, name)
|
|
|
audios_counter += os.path.isfile(path)
|
|
|
values = []
|
|
|
- for match in dejavu.recognize(recognizer, path, 5,
|
|
|
+
|
|
|
+ if not os.path.isfile(path):
|
|
|
+ log.error('[process_segment] file not found: {}'\
|
|
|
+ .format(short_path))
|
|
|
+ continue
|
|
|
+
|
|
|
+ for match in dejavu.recognize(recognizer, path, segment_size,
|
|
|
ads_filter=[md5hash]):
|
|
|
results.append({
|
|
|
'confidence': match['confidence'],
|
|
@@ -229,7 +278,7 @@ def process_segment(item, audios=None):
|
|
|
|
|
|
ts += match['length'] / 1000
|
|
|
|
|
|
- log.info('{2} ({3}/{0}) {1}'.format(
|
|
|
+ log.info('[process_segment] [{3}] {2} {0}) {1}'.format(
|
|
|
os.path.split(path)[-1],
|
|
|
','.join(values),
|
|
|
item['ad'],
|
|
@@ -245,27 +294,29 @@ def process_segment(item, audios=None):
|
|
|
item['id'],
|
|
|
None, # TODO: send results again
|
|
|
found=find_repetitions(results,
|
|
|
- segments_needed=segments_needed
|
|
|
+ segments_needed=segments_needed,
|
|
|
+ calibration=calibration,
|
|
|
),
|
|
|
missing_files=(12 - audios_counter) \
|
|
|
if audios_counter < 12 else 0
|
|
|
)
|
|
|
- log.info('API response: {}'.format(response))
|
|
|
+ log.info('[{}] API response: {}'.format(station, response))
|
|
|
except ConnectionError as ex:
|
|
|
- log.error(str(ex))
|
|
|
+ log.error('[process_segment] {}'.format(str(ex)))
|
|
|
except UserWarning as warn:
|
|
|
log.warning(str(warn))
|
|
|
|
|
|
-def find_repetitions(results, segments_needed=2):
|
|
|
+def find_repetitions(results, segments_needed=2, calibration=None):
|
|
|
found_counter = 0
|
|
|
found_index = None
|
|
|
expect_space = False
|
|
|
expect_recover = False
|
|
|
last_value_in_threshold_index = -1
|
|
|
+ fall_tolerance = calibration['fallTolerance']
|
|
|
found = []
|
|
|
|
|
|
if threshold_mode == THRESHOLD_FIXED:
|
|
|
- threshold = THRESHOLD
|
|
|
+ threshold = calibration['threshold']
|
|
|
elif threshold_mode == THRESHOLD_AVERAGE:
|
|
|
values = [x['confidence'] for x in results]
|
|
|
threshold = math.ceil(float(sum(values)) / float(len(values)))
|
|
@@ -283,7 +334,7 @@ def find_repetitions(results, segments_needed=2):
|
|
|
if expect_recover:
|
|
|
expect_recover = False
|
|
|
|
|
|
- elif FALL_TOLERANCE_SEGMENTS:
|
|
|
+ elif fall_tolerance:
|
|
|
if not expect_recover:
|
|
|
if last_value_in_threshold_index != -1:
|
|
|
""" Solo cuando ya haya entrado por lo menos
|
|
@@ -316,14 +367,23 @@ def find_repetitions(results, segments_needed=2):
|
|
|
|
|
|
return found
|
|
|
|
|
|
-def iterate_audios(dt, station):
|
|
|
+def iterate_audios(dt, station, calibration=None):
|
|
|
""" Given a datetime object and an station,
|
|
|
iterate a list of files that are between
|
|
|
the the date and itself plus 5 minutes;
|
|
|
station must match too """
|
|
|
- from_time = time.mktime(dt.timetuple()) \
|
|
|
- - AHEAD_TIME_AUDIO_TOLERANCE
|
|
|
- to_time = from_time + 3599 + AHEAD_TIME_AUDIO_TOLERANCE
|
|
|
+
|
|
|
+ tm = time.mktime(dt.timetuple())
|
|
|
+
|
|
|
+ if calibration and calibration['hourlyOffset']:
|
|
|
+ hoffset = calibration['hourlyOffset']
|
|
|
+ from_time = tm + hoffset
|
|
|
+ to_time = tm + 3599 + hoffset
|
|
|
+
|
|
|
+ elif AHEAD_TIME_AUDIO_TOLERANCE:
|
|
|
+ """ Conventional mode """
|
|
|
+ from_time = tm + AHEAD_TIME_AUDIO_TOLERANCE
|
|
|
+ to_time = from_time + 3599 + AHEAD_TIME_AUDIO_TOLERANCE
|
|
|
|
|
|
log.info('from {} to {}'.format(int(from_time), int(to_time)))
|
|
|
|