{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning)\n", "\n", "from __future__ import print_function, absolute_import\n", "from tornado.ioloop import IOLoop\n", "from client import Client, ConnectionError\n", "from boxconfig import parse_config\n", "from dejavu.recognize import FilePerSecondRecognizer\n", "from dejavu import Dejavu, CouldntDecodeError\n", "from endpoint import setup_endpoint\n", "from multiprocessing import Process\n", "import logging as log\n", "import requests\n", "import dateutil\n", "import math\n", "import time\n", "import json\n", "import os\n", "\n", "from queue import Queue, Empty" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "PATH = '/tmp'\n", "config = parse_config()\n", "queue = Queue()\n", "recognizer = FilePerSecondRecognizer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def obt_siguiente_trabajo():\n", " url = 'https://api.fourier.audio/na/calendario/pendiente?id=%s' % (config['device_id'],)\n", " response = requests.get(url)\n", " return response.json()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def descargar_anuncio(ad_path):\n", " anuncio = os.path.basename(ad_path)\n", " path = os.path.join(PATH, 'ads')\n", " os.makedirs(path, exist_ok=True)\n", " ruta_anuncio = os.path.join(path, anuncio)\n", "\n", " if os.path.isfile(ruta_anuncio):\n", " return ruta_anuncio\n", " cloud_base_url = 'https://storage.googleapis.com/{}' \\\n", " .format(config['bucket'])\n", " url = '{}/{}'.format(cloud_base_url, ad_path)\n", " response = requests.get(url)\n", "\n", " # TODO: Agregar alerta cuando la respuesta no sea 200\n", " if response.status_code == 200:\n", " with open(ruta_anuncio, \"wb\") as fp:\n", " fp.write(response.content)\n", " return ruta_anuncio\n", "\n", " else:\n", " print(\"[Anuncio][error] %s\" % (response.text))\n", " return None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def descargar_media(box, station, media):\n", " ref = '{}/{}/{}'.format(box, station, media)\n", " file = os.path.basename(ref)\n", " path = os.path.join(PATH, 'fourier', box, station)\n", " os.makedirs(path, exist_ok=True)\n", " out_file = os.path.join(path, file)\n", "\n", " if os.path.isfile(out_file):\n", " return out_file\n", "\n", " filename = ref.replace(\"/\",\"%2F\") \\\n", " .replace(\"+\",\"%2B\")\n", " cloud_base_url = '%s%s' % (\n", " 'https://firebasestorage.googleapis.com',\n", " '/v0/b/fourier-6e14d.appspot.com/o'\n", " )\n", " url = '{}/{}?alt=media'.format(cloud_base_url, filename)\n", " response = requests.get(url)\n", "\n", " if response.status_code == 200:\n", " with open(out_file, \"wb\") as fp:\n", " fp.write(response.content)\n", " return out_file\n", " else:\n", " print(\"[Media][url] %s\" % (response.text))\n", " print(\"[Media][error] %s\" % (response.text))\n", " return None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def enviar_resultados(trabajo):\n", " print('[Pendiente] %s' % (json.dumps(trabajo),))\n", " url = 'https://api.fourier.audio/v1/calendario/resultado'\n", " # response = requests.post(url, json=trabajo)\n", " # print('[Response] %s' % (response.text))\n", " # return response" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Aqui comienza la comparación de archivos" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trabajo = obt_siguiente_trabajo()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pendiente = trabajo" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ciudad = pendiente['origen']\n", "estacion = pendiente['estacion']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " anuncios = []\n", " id_by_ad = {}\n", " item_ids = []\n", " for i in pendiente[\"elementos\"]:\n", " id_by_ad[i['anuncio']] = i['id']\n", " if i['id'] not in item_ids:\n", " item_ids.append(i['id'])\n", "\n", " anuncio = descargar_anuncio(i[\"ruta\"])\n", " if anuncio is not None:\n", " print(\"Listo %s\" % (i['ruta'],))\n", " anuncios.append(anuncio)\n", "\n", "except Exception as err:\n", " print('[process_segment] [{}] {}'.format(estacion, err))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " media = []\n", " for i in pendiente[\"media\"]:\n", " archivo = descargar_media(ciudad, estacion, i[\"ruta\"])\n", " if archivo is not None:\n", " print(\"Listo %s %s %s\" % (ciudad, estacion, i['ruta'],))\n", " media.append((archivo, i[\"fecha\"], i[\"timestamp\"]))\n", "\n", "except Exception as err:\n", " print(err)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dejavu = None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dejavu = Dejavu({\"database_type\": \"mem\"})\n", "try:\n", " x = 0\n", " for ruta, fecha, ts in media:\n", " dejavu.fingerprint_file(ruta, ts)\n", "except Exception as ex:\n", " print(ex)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "resultados = {}\n", "for anuncio in anuncios:\n", " for i in dejavu.recognize(recognizer, anuncio, 5):\n", " if not \"id\" in i:\n", " continue\n", "\n", " if i[\"confidence\"] < 50:\n", " continue\n", "\n", " obj = i\n", " obj[\"match_time\"] = None\n", " nombre_anuncio = os.path.split(anuncio)[-1]\n", " id = id_by_ad[nombre_anuncio]\n", " dict = {\n", " \"id\": id,\n", " \"anuncio\": anuncio,\n", " \"fecha\": obj[\"name\"],\n", " \"timestamp\": obj[\"name\"] + int(obj['offset_seconds']),\n", " \"confianza\": obj[\"confidence\"],\n", " \"longitud\": obj[\"length\"],\n", " \"desfase_segundos\": obj[\"offset_seconds\"]\n", " }\n", " print(dict)\n", "\n", " if id in resultados.keys():\n", " resultados[id][\"longitud\"] += dict[\"longitud\"]\n", " resultados[id][\"confianza\"] += dict[\"confianza\"]\n", " continue\n", "\n", " resultados[id] = dict" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "json.dumps(resultados)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for id in resultados:\n", " e = resultados[id]\n", " for i in pendiente['elementos']:\n", " anuncio = e['anuncio'].replace('/tmp/ads/', '')\n", " if i['id'] == e['id'] and i['anuncio'] == anuncio:\n", " if 'encontrados' not in i:\n", " i['encontrados'] = []\n", " i['encontrados'].append(e)\n", " print(e)\n", " break" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "json.dumps(pendiente)" ] } ], "metadata": { "interpreter": { "hash": "631ec0267e76ead327ae18a3cdf21f6916cbb309615a11f42bd594f9973a79cd" }, "kernelspec": { "display_name": "Python 3.9.7 64-bit ('venv': venv)", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 2 }