| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258 | # -*- coding: utf8 -*-from __future__ import print_function, absolute_importfrom tornado.ioloop import IOLoopfrom client import Client, ConnectionErrorfrom boxconfig import parse_configfrom dejavu.recognize import FilePerSecondRecognizerfrom endpoint import setup_endpointfrom calibration import Calibrationsfrom dejavu import Dejavu, CouldntDecodeErrorfrom multiprocessing import Processimport logging as logimport mutagen.mp3import mathimport sysimport osfrom datetime import datetimeimport streamlit as stif sys.version_info >= (3, 0):    from queue import Queue, Emptyelse:    from Queue import Queue, Emptylog.basicConfig(format='[%(asctime)s] [%(module)s] %(message)s', level=log.INFO)AUDIOS_PATH = '/tmp'AHEAD_TIME_AUDIO_TOLERANCE = 2  # secondMAX_SEGMENT_THREADS = 4THRESHOLD = 10SEGMENTS_TOLERANCE_RATE = 0.6FALL_TOLERANCE_SEGMENTS = 1# THRESHOLDTHRESHOLD_FIXED = 1THRESHOLD_AVERAGE = 2# Modos de procesamiento de queue#  - QUEQUE_SINGLE: procesa solo un segmento a la vez#  - QUEUE_THREAD:  inicia un hilo para cada segmento# Por default se usará el threaded.# TODO: hacerlo configurable por medio de argumentos#       de ejecución.QUEUE_SINGLE = 1QUEUE_THREAD = 2# Se pueden usar diferentes API'se# la de threading y la de multiprocessing.MultiAPI = Processconfig = parse_config()queue = Queue()client = Client(config['device_id'],                config['apiSecret'])cloud_base_url = 'https://storage.googleapis.com/{}' \    .format(config['bucket'])base_path = config.get("basepath", "/var/fourier")device_id = config['device_id']device_path = os.path.join(base_path, device_id)recognizer = FilePerSecondRecognizer# settingsqueue_mode = QUEUE_SINGLEthreshold_mode = THRESHOLD_FIXEDdb_path = config.get('localDatabase', os.path.join(device_path, 'files.db'))#db = sqlite3.connect(db_path)cloud_cache = {}def process_segment(anuncios, grabaciones, audios=None, calibration=None):    """ Procesa una hora de audio """    # print(anuncio +" y "+ audio_busqueda)    # date = dateutil.parser.parse(item['fecha'], ignoretz=True)    segment_size = 5    audio_length = 0    anuncios = ["anuncio"]    dejavu = Dejavu({"database_type": "mem"})    try:        for i in range(0, len(anuncios)):            dejavu.fingerprint_file("anuncios/audio-{}.mp3".format(i,))    except Exception as ex:        log.error('[process_segment] cannot fingerprint: {}'.format(ex))    audios_counter = 0    results = []    v = []    for i in range(0, len(grabaciones)):        path = "grabaciones/audio-{}.mp3".format(i,)        values = []        try:            for match in dejavu.recognize(recognizer, path, segment_size):                name = path                results.append({                    'confidence': match['confidence'],                    'offset': match['offset'],                    'name': name                })                values.append(str(match['confidence']))            v.append(','.join(values))            log.info('[process_segment] {0} {1}'.format(                os.path.split(path)[-1],                ','.join(values),            ))            st.text('[process_segment] {0} {1}'.format(                os.path.split(path)[-1],                ','.join(values),            ))        except CouldntDecodeError as ex:            log.error('[process_segment] {}'.format(ex))    try:        encontrados = {}        item_ids = []        for i in item_ids:            r = [result for result in results if result["name"] == i]            encontrados[i] = find_repetitions(r, segments_needed=segments_needed, calibration=calibration,)        #for id in encontrados:r        #    for e in encontrados[id]:        #        for i in item['elementos']:        #            if i['id'] == id and i['anuncio'] == e['ad']:        #                if 'encontrados' not in i:        #                    i['encontrados'] = []        #                i['encontrados'].append(e)        #                break        #item["archivos_perdidos"] = (12 - audios_counter) if audios_counter < 12 else 0    except ConnectionError as ex:        log.error('[process_segment] {}'.format(str(ex)))    except UserWarning as warn:        log.warning(str(warn))def find_repetitions(results, segments_needed=2, calibration=None):    found_counter = 0    found_down_counter = 0    found_index = None    expect_space = False    expect_recover = False    last_value_in_threshold_index = -1    fall_tolerance = calibration['fallTolerance']    found = []    high = 100 # Obtener este valor desde un parámetro    middle_high = 50 # Obtener este valor desde un parámetro    segment_middle_needed = 2 # Obtener este valor desde un parámetro    found_high = None    found_middle_high = []    if threshold_mode == THRESHOLD_FIXED:        threshold = calibration['threshold']    elif threshold_mode == THRESHOLD_AVERAGE:        values = [x['confidence'] for x in results]        threshold = math.ceil(float(sum(values)) / float(len(values)))    if segments_needed < 1:        segments_needed = 1    for index, result in enumerate(results):        #if result['confidence'] >= high:        #    if found_high is None:        #        found_high = index        #    elif result['confidence'] > results[found_high]['confidence']:        #        found_high = index        #elif result['confidence'] >= middle_high:        #    found_middle_high.append(index)        if not expect_space:            if result['confidence'] >= threshold:                found_counter += 1                last_value_in_threshold_index = index                if found_index is None:                    found_index = index                if expect_recover:                    found_counter += found_down_counter                    expect_recover = False            elif fall_tolerance:                if not expect_recover:                    if last_value_in_threshold_index != -1:                        """ Solo cuando ya haya entrado por lo menos                        un valor en el rango del threshold, es cuando                        se podrá esperar un valor bajo """                        expect_recover = True                        found_down_counter += 1                    else:                        pass                else:                    """ Si después de haber pasado tolerado 1 elemento                    vuelve a salir otro fuera del threshold continuo,                    entonces ya se da por perdido """                    found_counter = 0                    found_down_counter = 0                    found_index = None                    expect_recover = False            else:                found_counter = 0                found_down_counter = 0                found_index = None                expect_recover = False                # Aquí veremos si hay un valor alto                #if found_high is not None:                #    found_row = results[found_high]                #    found.append(found_row)                #elif len(found_middle_high) >= segment_middle_needed:                #    found_row = results[found_middle_high[0]]                #    found.append(found_row)                #found_high = None                #found_middle_high = []        else:            if result['confidence'] <= threshold:                expect_space = False        if found_counter >= segments_needed:            found_row = results[found_index]            found.append(found_row)            found_counter = 0            expect_space = True            #found_high = None            #found_middle_high = []    return founddef limpiar_archivos():    anuncios = os.listdir('anuncios/')    for audio in anuncios:        os.remove('anuncios/{}'.format(audio,))    grabaciones = os.listdir('grabaciones/')    for audio in grabaciones:        os.remove('grabaciones/{}'.format(audio))def main():    if st.button("Limpiar archivos"):        limpiar_archivos()    anuncios = st.file_uploader("Elige los anuncios", accept_multiple_files=True, type="mp3")    for i in range(0, len(anuncios)):        with open("anuncios/audio-{}.mp3".format(i,), "wb") as audio:            audio.write(anuncios[i].getvalue())    grabaciones = st.file_uploader("Elige la grabación", accept_multiple_files=True, type="mp3")    for i in range(0, len(grabaciones)):        with open("grabaciones/audio-{}.mp3".format(i,), "wb") as audio:            audio.write(grabaciones[i].getvalue())    if st.button("Comparar"):        process_segment(anuncios, grabaciones)main()
 |