Skip to content

Commit

Permalink
working on app
Browse files Browse the repository at this point in the history
  • Loading branch information
dkohlsdorf committed Aug 13, 2023
1 parent cf70408 commit ec7221c
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 23 deletions.
30 changes: 27 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from flask import Flask, render_template, flash, redirect, request


VERSION = 'sep_2022'
VERSION = 'no_echo'
SEQ_PATH = f'../web_service/{VERSION}/sequences/'
IMG_PATH = f'../web_service/{VERSION}/images/'
PKL_PATH = f'../web_service/{VERSION}/service.pkl'
Expand Down Expand Up @@ -162,12 +162,35 @@ def discovery():
return render_template('discovery.html', sequences=sequences, n=len(sequences), keys = s[2])


@app.route('/query_relaxed', methods=['POST'])
@flask_login.login_required
def upload_relaxed():
if request.method == 'POST':
print(request.files)
if 'file' not in request.files:
flash('No File Uploaded')
return redirect('/discovery')
file = request.files['file']
if not file.filename.endswith('.wav'):
flash('Only wav files are allowed')
return redirect('/discovery')
path = f"{UPLOAD_PATH}/{file.filename}"
file.save(path)
print("Done Upload")
img, decoding, nn, keys = DISCOVERY.query_by_file(path, True)
decoding = " ".join(decoding)

history = QueryHistory()
history.insert(decoding, path.split('/')[-1])

sequences = [process_sequence(x) for x in nn]
return render_template('discovery.html', sequences=sequences, n=len(sequences), keys = keys, query=(img, decoding))

@app.route('/query', methods=['POST'])
@flask_login.login_required
def upload():
if request.method == 'POST':
print(request.files)

if 'file' not in request.files:
flash('No File Uploaded')
return redirect('/discovery')
Expand All @@ -177,7 +200,8 @@ def upload():
return redirect('/discovery')
path = f"{UPLOAD_PATH}/{file.filename}"
file.save(path)
img, decoding, nn, keys = DISCOVERY.query_by_file(path)
print("Done Upload")
img, decoding, nn, keys = DISCOVERY.query_by_file(path, False)
decoding = " ".join(decoding)

history = QueryHistory()
Expand Down
40 changes: 26 additions & 14 deletions decoder_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from fastavro import writer, reader, parse_schema
from scipy.io.wavfile import write

ADDR = 'localhost:50051'
VERSION = 'no_echo'
SEQ_PATH = f'../web_service/{VERSION}/sequences/'
Expand Down Expand Up @@ -170,7 +171,7 @@ def subsequences(sequence, max_len=8):
for i in range(length, n):
substring = " ".join([s['cls'] for s in sequence[i-length:i]])
yield substring


class DiscoveryService:

Expand All @@ -181,6 +182,9 @@ def __init__(self, sequence_path, img_path, limit = None):
self.decodings = []
self.encounter_ids = []

# TODO ts id extern index -> sequence
self.inverted_idx = {}

self.densities = {}
self.neighbors = {}
self.substrings = {}
Expand All @@ -194,6 +198,7 @@ def __init__(self, sequence_path, img_path, limit = None):
self.parse(sequence_path, limit)
self.setup_discovery()
self.setup_substrings()
self.setup_inverted()
self.sequence_path = sequence_path
self.img_path = img_path

Expand All @@ -202,8 +207,9 @@ def init_model(self, model_path):
self.lab = pkl.load(open(f"{model_path}/labels.pkl", "rb"))
self.reverse = {v:k for k, v in self.lab.items()}
self.label_mapping = pkl.load(open(f'{model_path}/label_mapping.pkl', 'rb'))
load(ADDR, VERSION)

def parse(self, sequence_path, limit):
def parse(self, sequence_path, limit):
for file in os.listdir(sequence_path):
eid = file.replace('.avro', '')
print(f" ... reading: {file} {eid}")
Expand All @@ -214,7 +220,7 @@ def parse(self, sequence_path, limit):
avro_reader = reader(fo)
for record in avro_reader:
self.sequences.append(record)
self.encounter_ids.append(eid)
self.encounter_ids.append(eid)

def setup_substrings(self):
for i, sequence in enumerate(self.sequences):
Expand All @@ -223,7 +229,12 @@ def setup_substrings(self):
if sub not in self.substrings:
self.substrings[sub] = []
self.substrings[sub].append(i)


def setup_inverted(self):
for i, sequence in enumerate(self.sequences):
for ts_id in sequence['proba_ids']:
self.inverted_idx[ts_id] = i

def setup_discovery(self):
for key, sequence in enumerate(self.sequences):
decoded = [DecodedSymbol.from_dict(x) for x in sequence['sequence']]
Expand Down Expand Up @@ -259,8 +270,8 @@ def sample(self):
keys = [neighbor for _, neighbor in self.neighbors[region]]
nn = [self.sequences[neighbor] for neighbor in keys]
return self.sequences[region], nn, keys

def query_by_file(self, filename):
def query_by_file(self, filename, relax=False):
name = str(filename).split('/')[-1].split('.')[0]
query_id = f"query_{name}"
audio = raw(filename)
Expand All @@ -274,26 +285,27 @@ def query_by_file(self, filename):

n = len(probs)
probas = []
for i in range(100, n, 50):
probas.append(probs[i-100:i])
ids = insert_all(probas, ADDR)

for i in range(100, n, 10):
probas.append(probs[i-100:i])

records = [{
"path": name,
"start": start_bound,
"stop": stop_bound,
"sequence": [token.to_dict() for token in c],
"proba_ids": ids
"proba_ids": []
}]
with open(f'{self.sequence_path}/{query_id}.avro', 'wb') as out:
writer(out, SCHEMA, records)

decoded = [DecodedSymbol.from_dict(x) for x in records[0]['sequence']]
neighbors = query(decoded, self.decodings, self.db)
if relax:
neighbors = find_relaxed(ADDR, VERSION, probas, self.inverted_idx)
else:
neighbors = query(decoded, self.decodings, self.db)
keys = [neighbor for _, neighbor in neighbors]
nn = [self.sequences[neighbor] for neighbor in keys]
return f"{query_id}.png", [s.cls for s in decoded], nn, keys

return f"{query_id}.png", [s.cls for s in decoded], nn, keys

def get(self, region):
keys = [neighbor for _, neighbor in self.neighbors[region]]
Expand Down
30 changes: 28 additions & 2 deletions lib_dolphin/extern_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import indexing_pb2
import indexing_pb2_grpc

from collections import defaultdict

SIL = True

def timeseries(ts):
Expand Down Expand Up @@ -43,9 +45,33 @@ def reindex(addr, name):
response = stub.save(indexing_pb2.SaveIndexRequest(name = "name"))
print("saving done")


def load(addr, name):
with grpc.insecure_channel(addr) as channel:
stub = indexing_pb2_grpc.TimeSeriesServiceStub(channel)
stub = indexing_pb2_grpc.TimeSeriesServiceStub(channel)
response = stub.load(indexing_pb2.LoadIndexRequest(name = name))


def find_relaxed(addr, name, sequences, inverted_idx, k = 10):
with grpc.insecure_channel(addr) as channel:
stub = indexing_pb2_grpc.TimeSeriesServiceStub(channel)
not_there = 0
there = 0
found = defaultdict(int)
not_found = set()
for sequence in sequences:
query = timeseries(sequence)
response = stub.query(query)
for neighbor in response.ids:
if neighbor not in inverted_idx:
not_there += 1
not_found.add(neighbor)
else:
i = inverted_idx[neighbor]
found[i] += 1
there += 1
neighbors = sorted(found.items(), key=lambda x: -x[1])[:k]

print(f"{there} / {not_there}: {len(not_found)}")
print(not_found)
return [(1.0 / n, k) for k, n in neighbors]
15 changes: 11 additions & 4 deletions templates/discovery.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,18 @@ <h3> ERRORS: <h3>
</tr>
<tr>
<td>
<form action='/query' method='post' enctype='multipart/form-data'>
<input type="file" name="file">
<input type='submit' value='Upload'/>
</form>
Relaxed: <form action='/query_relaxed' method='post' enctype='multipart/form-data'>
<input type="file" name="file">
<input type='submit' value='Upload'/>
</form>
</td>
<td>
Constrained: <form action='/query' method='post' enctype='multipart/form-data'>
<input type="file" name="file">
<input type='submit' value='Upload'/>
</form>
</td>

</tr>
<tr>
<td>
Expand Down

0 comments on commit ec7221c

Please sign in to comment.