Sfoglia il codice sorgente

Comparación de multiple archivos

Hugo 4 anni fa
parent
commit
e86195bc5e
3 ha cambiato i file con 54 aggiunte e 58 eliminazioni
  1. 3 3
      Dockerfile
  2. 10 0
      docker-compose.yaml
  3. 41 55
      ondemand/service_streamlit.py

+ 3 - 3
Dockerfile

@@ -1,10 +1,10 @@
-FROM python:2.7.18-slim-buster
+FROM python:3.8.8-slim-buster
 RUN apt-get update -y
 RUN apt-get install -y gcc portaudio19-dev python-setuptools ffmpeg tk
 RUN python -m pip install --upgrade pip
-RUN pip install cryptography numpy pydub matplotlib scipy tornado requests pyaudio firebase_admin psutil mutagen streamlit
+RUN pip install cryptography numpy pydub matplotlib scipy tornado requests pyaudio firebase_admin psutil mutagen streamlit==0.78
 ADD . /code/
 WORKDIR /code/ondemand/
 EXPOSE 8501
-ENTRYPOINT [ "streamlit", "run" ,"service1.py"]
+ENTRYPOINT [ "streamlit", "run" ,"service_streamlit.py"]
 #RUN python setup.py install

+ 10 - 0
docker-compose.yaml

@@ -0,0 +1,10 @@
+version: '3'
+services:
+  server:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    volumes:
+      - .:/code
+    ports:
+      - "8501:8501"

+ 41 - 55
ondemand/service_streamlit.py

@@ -66,73 +66,45 @@ db_path = config.get('localDatabase', os.path.join(device_path, 'files.db'))
 #db = sqlite3.connect(db_path)
 cloud_cache = {}
 
-def process_segment(anuncio, audio_busqueda, audios=None, calibration=None):
+def process_segment(anuncios, grabaciones, audios=None, calibration=None):
     """ Procesa una hora de audio """
-    print(anuncio +" y "+ audio_busqueda)
-    #date = dateutil.parser.parse(item['fecha'], ignoretz=True)
+    # print(anuncio +" y "+ audio_busqueda)
+    # date = dateutil.parser.parse(item['fecha'], ignoretz=True)
     segment_size = 5
     audio_length = 0
 
-
-    # 1.1 Calcular el número de segmentos requeridos
-    # de acuerdo a la duración total del audio.
-    try:
-        #filename = "/tmp/anuncios/RDF2112020ORIGINAL.MP3"
-        filename = anuncio
-        audio = mutagen.mp3.MP3(filename)
-        audio_length = audio.info.length
-
-        if segment_size == 'integer':
-            segment_size = int(audio_length)
-        elif segment_size == 'ceil':
-            segment_size = int(math.ceil(audio_length / 5)) * 5
-
-        segments_needed = int(round(float(audio_length) / float(segment_size)))
-        segments_needed = int(round(segments_needed * 0.8))
-
-    except Exception as ex:
-        #log.error('[process_segment] file {} is not an mp3'.format(filename))
-        log.error(str(ex))
-        return
-
+    anuncios = ["anuncio"]
     dejavu = Dejavu({"database_type": "mem"})
-    try:    
-        dejavu.fingerprint_file(filename)
+    try:
+        for i in range(0, len(anuncios)):
+            dejavu.fingerprint_file("anuncios/audio-{}.mp3".format(i,))
     except Exception as ex:
         log.error('[process_segment] cannot fingerprint: {}'.format(ex))
 
-    # 2. Read the list of files from local database
     audios_counter = 0
     results = []
     v = []
-    #audios_iterable = [("/tmp/anuncios/RDF2112020GDL115271020NOESTAENREPORTEPEROESCORRECTO.mp3", "RDF2112020GDL115271020NOESTAENREPORTEPEROESCORRECTO.mp3", 1000)]
-    audios_iterable = [(audio_busqueda, audio_busqueda, 1000)]
-    for path, name, ts in audios_iterable:
-        audios_counter += os.path.isfile(path)
-        values = []
 
+    for i in range(0, len(grabaciones)):
+        path = "grabaciones/audio-{}.mp3".format(i,)
+        values = []
         try:
             for match in dejavu.recognize(recognizer, path, segment_size):
-                name = None
-                ad = None
+                name = path
 
                 results.append({
-                    'ad': ad,
                     'confidence': match['confidence'],
-                    'timestamp': ts,
                     'offset': match['offset'],
                     'name': name
                 })
                 values.append(str(match['confidence']))
 
-                ts += match['length'] / 1000
-
             v.append(','.join(values))
-            log.info('[process_segment] {0}) {1}'.format(
+            log.info('[process_segment] {0} {1}'.format(
                 os.path.split(path)[-1],
                 ','.join(values),
             ))
-            st.text('[process_segment] {0}) {1}'.format(
+            st.text('[process_segment] {0} {1}'.format(
                 os.path.split(path)[-1],
                 ','.join(values),
             ))
@@ -147,7 +119,7 @@ def process_segment(anuncio, audio_busqueda, audios=None, calibration=None):
             r = [result for result in results if result["name"] == i]
             encontrados[i] = find_repetitions(r, segments_needed=segments_needed, calibration=calibration,)
 
-        #for id in encontrados:
+        #for id in encontrados:r
         #    for e in encontrados[id]:
         #        for i in item['elementos']:
         #            if i['id'] == id and i['anuncio'] == e['ad']:
@@ -255,18 +227,32 @@ def find_repetitions(results, segments_needed=2, calibration=None):
 
     return found
 
-def WebService():
-    anuncio_file = st.file_uploader("Selecciona el anuncio")
-    if anuncio_file is not None:
-        name = "anuncio"
-        with open(name, "wb") as anuncio:
-            anuncio.write(anuncio_file.getvalue())
-    
-    audio_file = st.file_uploader("Selecciona el audio")
-    if audio_file is not None:
-        with open("audio", "wb") as audio:
-            audio.write(audio_file.getvalue())
+
+def limpiar_archivos():
+    anuncios = os.listdir('anuncios/')
+    for audio in anuncios:
+        os.remove('anuncios/{}'.format(audio,))
+
+    grabaciones = os.listdir('grabaciones/')
+    for audio in grabaciones:
+        os.remove('grabaciones/{}'.format(audio))
+
+
+def main():
+    if st.button("Limpiar archivos"):
+        limpiar_archivos()
+
+    anuncios = st.file_uploader("Elige los anuncios", accept_multiple_files=True, type="mp3")
+    for i in range(0, len(anuncios)):
+        with open("anuncios/audio-{}.mp3".format(i,), "wb") as audio:
+            audio.write(anuncios[i].getvalue())
+
+    grabaciones = st.file_uploader("Elige la grabación", accept_multiple_files=True, type="mp3")
+    for i in range(0, len(grabaciones)):
+        with open("grabaciones/audio-{}.mp3".format(i,), "wb") as audio:
+            audio.write(grabaciones[i].getvalue())
+
     if st.button("Comparar"):
-        process_segment("anuncio", "audio")
+        process_segment(anuncios, grabaciones)
 
-WebService()
+main()