Browse Source

if a file is already downloaded will not be downloaded in the process of running until the service is restarted

Gamaliel Espinoza 7 years ago
parent
commit
5de96c62ad
1 changed files with 22 additions and 2 deletions
  1. 22 2
      ondemand/service.py

+ 22 - 2
ondemand/service.py

@@ -10,6 +10,8 @@ from fourier.dejavu import Dejavu
 from Queue import Queue, Empty
 from firebase_admin import credentials
 from firebase_admin import db as fbdb
+from binascii import hexlify
+from base64 import b64decode
 import firebase_admin
 import requests
 import dateutil
@@ -36,6 +38,7 @@ recognizer = FilePerSecondRecognizer
 
 db_path = os.path.join(device_path, 'files.db')
 db = sqlite3.connect(db_path)
+cloud_cache = {}
 
 def feed_queue():
     """ Search for pending scheduled work in
@@ -82,7 +85,7 @@ def process_segment(item):
 
     # 1. obtener el audio desde firebase
     #    y calcular su fingerprint.
-    filename = cloud_download(ad_key=item['ad'])
+    filename, md5hash = cloud_download(ad_key=item['ad'])
     if not filename:
         print('ad file missing', file=sys.stderr)
         return
@@ -188,6 +191,15 @@ def iterate_audios(dt, station):
 def cloud_download(ad_key=None):
     """ Given an ad key, the file is downloaded to
     the system temporal folder to be processed """
+
+    if ad_key in cloud_cache:
+        """ If this file has already been downloaded,
+        will not be downloaded again, instead will
+        be taken from cloud_cache dictionary """
+        filename, md5hash = cloud_cache[ad_key]
+        if os.path.isfile(filename):
+            return filename, md5hash
+
     ad = fbdb.reference('ads/{}'.format(ad_key)).get()
     filename = os.path.basename(ad['path'])
     out_file = os.path.join(AUDIOS_PATH, filename)
@@ -195,9 +207,17 @@ def cloud_download(ad_key=None):
     response = requests.get(url)
     
     if response.status_code == 200:
+        hashes = response.headers['x-goog-hash']
+        hashes = hashes.split(',')
+        hashes = [h.split('=', 1) for h in hashes]
+        hashes = {h[0].strip(): hexlify(b64decode(h[1])) for h in hashes}
+        md5sum = hashes['md5']
+
         with open(out_file, "wb") as fp:
             fp.write(response.content)
-            return out_file
+            tp = (out_file, md5sum)
+            cloud_cache[ad_key] = tp
+            return tp
 
 app = Application()
 loop = IOLoop.current()