Browse Source

if a file is already downloaded will not be downloaded in the process of running until the service is restarted

Gamaliel Espinoza 7 years ago
parent
commit
5de96c62ad
1 changed files with 22 additions and 2 deletions
  1. 22 2
      ondemand/service.py

+ 22 - 2
ondemand/service.py

@@ -10,6 +10,8 @@ from fourier.dejavu import Dejavu
 from Queue import Queue, Empty
 from Queue import Queue, Empty
 from firebase_admin import credentials
 from firebase_admin import credentials
 from firebase_admin import db as fbdb
 from firebase_admin import db as fbdb
+from binascii import hexlify
+from base64 import b64decode
 import firebase_admin
 import firebase_admin
 import requests
 import requests
 import dateutil
 import dateutil
@@ -36,6 +38,7 @@ recognizer = FilePerSecondRecognizer
 
 
 db_path = os.path.join(device_path, 'files.db')
 db_path = os.path.join(device_path, 'files.db')
 db = sqlite3.connect(db_path)
 db = sqlite3.connect(db_path)
+cloud_cache = {}
 
 
 def feed_queue():
 def feed_queue():
     """ Search for pending scheduled work in
     """ Search for pending scheduled work in
@@ -82,7 +85,7 @@ def process_segment(item):
 
 
     # 1. obtener el audio desde firebase
     # 1. obtener el audio desde firebase
     #    y calcular su fingerprint.
     #    y calcular su fingerprint.
-    filename = cloud_download(ad_key=item['ad'])
+    filename, md5hash = cloud_download(ad_key=item['ad'])
     if not filename:
     if not filename:
         print('ad file missing', file=sys.stderr)
         print('ad file missing', file=sys.stderr)
         return
         return
@@ -188,6 +191,15 @@ def iterate_audios(dt, station):
 def cloud_download(ad_key=None):
 def cloud_download(ad_key=None):
     """ Given an ad key, the file is downloaded to
     """ Given an ad key, the file is downloaded to
     the system temporal folder to be processed """
     the system temporal folder to be processed """
+
+    if ad_key in cloud_cache:
+        """ If this file has already been downloaded,
+        will not be downloaded again, instead will
+        be taken from cloud_cache dictionary """
+        filename, md5hash = cloud_cache[ad_key]
+        if os.path.isfile(filename):
+            return filename, md5hash
+
     ad = fbdb.reference('ads/{}'.format(ad_key)).get()
     ad = fbdb.reference('ads/{}'.format(ad_key)).get()
     filename = os.path.basename(ad['path'])
     filename = os.path.basename(ad['path'])
     out_file = os.path.join(AUDIOS_PATH, filename)
     out_file = os.path.join(AUDIOS_PATH, filename)
@@ -195,9 +207,17 @@ def cloud_download(ad_key=None):
     response = requests.get(url)
     response = requests.get(url)
     
     
     if response.status_code == 200:
     if response.status_code == 200:
+        hashes = response.headers['x-goog-hash']
+        hashes = hashes.split(',')
+        hashes = [h.split('=', 1) for h in hashes]
+        hashes = {h[0].strip(): hexlify(b64decode(h[1])) for h in hashes}
+        md5sum = hashes['md5']
+
         with open(out_file, "wb") as fp:
         with open(out_file, "wb") as fp:
             fp.write(response.content)
             fp.write(response.content)
-            return out_file
+            tp = (out_file, md5sum)
+            cloud_cache[ad_key] = tp
+            return tp
 
 
 app = Application()
 app = Application()
 loop = IOLoop.current()
 loop = IOLoop.current()