fingerprint.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. import sys
  2. import numpy as np
  3. import matplotlib.mlab as mlab
  4. import matplotlib.pyplot as plt
  5. from scipy.ndimage.filters import maximum_filter
  6. from scipy.ndimage.morphology import (generate_binary_structure,
  7. iterate_structure, binary_erosion)
  8. import hashlib
  9. from operator import itemgetter
  10. PY3 = sys.version_info >= (3, 0)
  11. IDX_FREQ_I = 0
  12. IDX_TIME_J = 1
  13. ######################################################################
  14. # Sampling rate, related to the Nyquist conditions, which affects
  15. # the range frequencies we can detect.
  16. DEFAULT_FS = int(44100 / 2)
  17. ######################################################################
  18. # Size of the FFT window, affects frequency granularity
  19. DEFAULT_WINDOW_SIZE = 4096
  20. ######################################################################
  21. # Ratio by which each sequential window overlaps the last and the
  22. # next window. Higher overlap will allow a higher granularity of offset
  23. # matching, but potentially more fingerprints.
  24. DEFAULT_OVERLAP_RATIO = 0.4 #0.75 better accuracy much slower, default 0.5
  25. ######################################################################
  26. # Degree to which a fingerprint can be paired with its neighbors --
  27. # higher will cause more fingerprints, but potentially better accuracy.
  28. DEFAULT_FAN_VALUE = 15 #30 twice as many fingerprints, not much accuracy
  29. ######################################################################
  30. # Minimum amplitude in spectrogram in order to be considered a peak.
  31. # This can be raised to reduce number of fingerprints, but can negatively
  32. # affect accuracy.
  33. DEFAULT_AMP_MIN = 10 #/2 useless
  34. ######################################################################
  35. # Number of cells around an amplitude peak in the spectrogram in order
  36. # for Dejavu to consider it a spectral peak. Higher values mean less
  37. # fingerprints and faster matching, but can potentially affect accuracy.
  38. PEAK_NEIGHBORHOOD_SIZE = 10 #10 to augment fingerprints 3x and improve accuracy, default 20
  39. ######################################################################
  40. # Thresholds on how close or far fingerprints can be in time in order
  41. # to be paired as a fingerprint. If your max is too low, higher values of
  42. # DEFAULT_FAN_VALUE may not perform as expected.
  43. MIN_HASH_TIME_DELTA = 0
  44. MAX_HASH_TIME_DELTA = 200
  45. ######################################################################
  46. # If True, will sort peaks temporally for fingerprinting;
  47. # not sorting will cut down number of fingerprints, but potentially
  48. # affect performance.
  49. PEAK_SORT = True
  50. ######################################################################
  51. # Number of bits to throw away from the front of the SHA1 hash in the
  52. # fingerprint calculation. The more you throw away, the less storage, but
  53. # potentially higher collisions and misclassifications when identifying songs.
  54. FINGERPRINT_REDUCTION = 20 #1 for much less storage, leave at 20, don't change
  55. def fingerprint(channel_samples, Fs=DEFAULT_FS,
  56. wsize=DEFAULT_WINDOW_SIZE,
  57. wratio=DEFAULT_OVERLAP_RATIO,
  58. fan_value=DEFAULT_FAN_VALUE,
  59. amp_min=DEFAULT_AMP_MIN):
  60. """
  61. FFT the channel, log transform output, find local maxima, then return
  62. locally sensitive hashes.
  63. """
  64. # FFT the signal and extract frequency components
  65. arr2D = mlab.specgram(
  66. channel_samples,
  67. NFFT=wsize,
  68. Fs=Fs,
  69. window=mlab.window_hanning,
  70. noverlap=int(wsize * wratio))[0]
  71. # apply log transform since specgram() returns linear array
  72. arr2D = 10 * np.log10(arr2D)
  73. arr2D[arr2D == -np.inf] = 0 # replace infs with zeros
  74. # find local maxima
  75. local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
  76. # return hashes
  77. return generate_hashes(local_maxima, fan_value=fan_value)
  78. def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
  79. # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure
  80. struct = generate_binary_structure(2, 1)
  81. neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE)
  82. # find local maxima using our fliter shape
  83. local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
  84. background = (arr2D == 0)
  85. eroded_background = binary_erosion(background, structure=neighborhood,
  86. border_value=1)
  87. # Boolean mask of arr2D with True at peaks
  88. detected_peaks = local_max ^ eroded_background
  89. # extract peaks
  90. amps = arr2D[detected_peaks]
  91. j, i = np.where(detected_peaks)
  92. # filter peaks
  93. amps = amps.flatten()
  94. peaks = zip(i, j, amps)
  95. peaks_filtered = [x for x in peaks if x[2] > amp_min] # freq, time, amp
  96. # get indices for frequency and time
  97. frequency_idx = [x[1] for x in peaks_filtered]
  98. time_idx = [x[0] for x in peaks_filtered]
  99. if plot:
  100. # scatter of the peaks
  101. fig, ax = plt.subplots()
  102. ax.imshow(arr2D)
  103. ax.scatter(time_idx, frequency_idx)
  104. ax.set_xlabel('Time')
  105. ax.set_ylabel('Frequency')
  106. ax.set_title("Spectrogram")
  107. plt.gca().invert_yaxis()
  108. plt.show()
  109. return zip(frequency_idx, time_idx)
  110. def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
  111. """
  112. Hash list structure:
  113. sha1_hash[0:20] time_offset
  114. [(e05b341a9b77a51fd26, 32), ... ]
  115. """
  116. if PEAK_SORT:
  117. if PY3:
  118. peaks = sorted(peaks, key=itemgetter(1))
  119. else:
  120. peaks.sort(key=itemgetter(1))
  121. for i in range(len(peaks)):
  122. for j in range(1, fan_value):
  123. if (i + j) < len(peaks):
  124. freq1 = peaks[i][IDX_FREQ_I]
  125. freq2 = peaks[i + j][IDX_FREQ_I]
  126. t1 = peaks[i][IDX_TIME_J]
  127. t2 = peaks[i + j][IDX_TIME_J]
  128. t_delta = t2 - t1
  129. if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
  130. if PY3:
  131. h = hashlib.sha1(
  132. ("%s|%s|%s" % (
  133. str(freq1),
  134. str(freq2),
  135. str(t_delta)
  136. )).encode()
  137. )
  138. else:
  139. h = hashlib.sha1(
  140. "%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
  141. yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)