-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfuzzySearch.py
110 lines (79 loc) · 2.4 KB
/
fuzzySearch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python
"""
Uses jaro distance (fuzzy search) to find a given song in the music directory.
The directory is assumed to be formatted like this,
LIBRARY_PATH/Artist Name/Album Name/song_name.mp3
Ahmed azaan
azaan@outlook.com
"""
import os
import jellyfish
from string import digits
from operator import itemgetter
LIBRARY_PATH = os.environ.get('SONG_LIBRARY')
PL = len(LIBRARY_PATH)
index = 0
class Song(object):
def __init__(self, basepath, filename, artist):
self.path = basepath + filename
self.artist = artist
self.filename = filename
global index
self.index = index
index += 1
s = filename
# Normalize the name
# Remove digits, .mp3, whitespace and tolower
s = s.translate(None, digits)
s = s.replace(".mp3", "")
s = s.strip()
s = s.lower()
self.str = s
def __str__(self):
return self.str
# Array all song objects will be loaded into
songs = []
loaded = False
def loadDir():
global songs, loaded
# Load only once
if loaded:
return
# Load up the songs array
for dirname, subdir, files in os.walk(LIBRARY_PATH):
if len(files) > 0:
artist = dirname[PL+1:].split("/")[0]
basepath = dirname + "/"
for f in files:
songs.append(Song(basepath, f, artist))
loaded = True
# Get song based on the id
def getFromId(sid):
global songs
loadDir()
song = songs[sid]
o = {'index': song.index, 'filename': song.filename,
'artist': song.artist, 'path': song.path,
'match': 100
}
return [o]
# Searches for a string and returns list of matches
# Matches = (filename, artist, path, match%)
def search(string, psi=0.90, limit=10, searchall=False):
global songs
loadDir()
res = []
count = 0
for song in songs:
match = jellyfish.jaro_distance(string.lower(), str(song))
if match > psi:
res.append({'index': song.index, 'filename': song.filename,
'artist': song.artist, 'path': song.path,
'match': match * 100})
count += 1
if not searchall and count > limit:
break
return sorted(res, key=itemgetter('match'), reverse=True)
if __name__ == '__main__':
val = raw_input()
print '\n'.join([str(x) for x in search(val)])