Browse Source

Fixed confidence count

Gamaliel Espinoza 7 years ago
parent
commit
b5d53aba95
2 changed files with 46 additions and 30 deletions
  1. 1 1
      ondemand/__init__.py
  2. 45 29
      ondemand/service.py

+ 1 - 1
ondemand/__init__.py

@@ -1 +1 @@
-__version__ = '1.1.0'
+__version__ = '1.1.1'

+ 45 - 29
ondemand/service.py

@@ -93,8 +93,8 @@ def process_segment(item):
         print('cannot fingerprint: {}'.format(ex))
 
     # 2. Read the list of files from local database
+    results = []
     for path, name, ts in iterate_audios(date, station):
-        results = []
         for match in dejavu.recognize(recognizer, path, 5):
             try:
                 results.append({
@@ -105,17 +105,44 @@ def process_segment(item):
             except KeyError as ex:
                 print(ex, file=sys.stderr)
 
-            ts += float(match["length"]) / 1000.0
-        try:
-            client.put_schedule_results(
-                item['schedule'],
-                item['id'],
-                results
-            )
-        except ConnectionError as ex:
-            print(ex, file=sys.stderr)
-        except UserWarning as warn:
-            print(str(warn), file=sys.stderr)
+    # 3. Look for success or failure
+    ts = 0
+    found_counter = 0
+    found_index = None
+    segments_needed = 2
+    seconds_needed = 9
+    threshold = 20
+    found = None
+
+    for index, result in enumerate(results):
+        if result['confidence'] > threshold:
+            found_counter += 1
+            if found_index is None:
+                found_index = index
+        else:
+            found_counter = 0
+            found_index = None
+
+        if found_counter >= segments_needed:
+            """ TODO: It will be neccessary to improve this
+            further, so it can recognize more than one
+            audio in the same segment of 1 hour. Also the
+            seconds transcurred is important; a time
+            difference is needed """
+            found = results[found_index]
+            break
+    
+    try:
+        client.put_schedule_results(
+            item['schedule'],
+            item['id'],
+            results,
+            found=found['timestamp'] if found else None,
+        )
+    except ConnectionError as ex:
+        print(ex, file=sys.stderr)
+    except UserWarning as warn:
+        print(str(warn), file=sys.stderr)
 
 def iterate_audios(dt, station):
     """ Given a datetime object and an station,
@@ -127,7 +154,7 @@ def iterate_audios(dt, station):
 
     cursor = db.cursor()
     cursor.execute((
-        'select "filename" '
+        'select "filename", "timestamp" '
         'from "file" '
         'where "timestamp" between ? and ? '
             'and "station" = ? '
@@ -135,16 +162,13 @@ def iterate_audios(dt, station):
         ),   
         (from_time, to_time, station, ),
     )
-    files = []
-    for file in cursor:
-        files.append(file[0])
+    files = [file for file in cursor]
+    cursor.close()
 
-    for mp3path in files:
+    for mp3 in files:
+        mp3path, ts = mp3
         mp3name = os.path.basename(mp3path)
-        mp3date = parse_date_filename(mp3name)
-        if mp3date:
-            mp3ts = int(mp3date.strftime("%s"))
-            yield (mp3path, mp3name, mp3ts)
+        yield (mp3path, mp3name, ts)
 
 def cloud_download(ad_key=None):
     """ Given an ad key, the file is downloaded to
@@ -160,14 +184,6 @@ def cloud_download(ad_key=None):
             fp.write(response.content)
             return out_file
 
-def parse_date_filename(val):
-    try:
-        return datetime.strptime(val[:19],
-            '%Y-%m-%dT%H-%M-%S')
-    except ValueError:
-        return datetime.strptime(val[:19],
-            '%Y-%m-%d-%H-%M-%S')
-
 app = Application()
 loop = IOLoop.current()
 loop.add_callback(feed_queue)