Skip to content

Commit

Permalink
update plugins
Browse files Browse the repository at this point in the history
  • Loading branch information
UranusYu committed Oct 16, 2023
1 parent efa2d4c commit 6f23864
Show file tree
Hide file tree
Showing 20 changed files with 511 additions and 266 deletions.
File renamed without changes.
22 changes: 13 additions & 9 deletions copilot/README.md → agent/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<!-- <p align="center"> <b> Music Pilot </b> </p> -->
<!-- <p align="center"> <b> Music Agent </b> </p> -->

<div align="center">

Expand All @@ -9,13 +9,16 @@

## Demo Video

![Download demo video](https://drive.google.com/file/d/1W0iJPHNPA6ENLJrPef0vtQytboSubxXe/view?usp=sharing)
[![Watch the video](https://img.youtube.com/vi/tpNynjdcBqA/maxresdefault.jpg)](https://youtu.be/tpNynjdcBqA)

## Features

- Accessibility: Music Pilot dynamically selects the most appropriate methods for each music-related task.
- Unity: Music Pilot unifies a wide array of tools into a single system, incorporating Huggingface models, GitHub projects, and Web APIs.
- Modularity: Music Pilot offers high modularity, allowing users to effortlessly enhance its capabilities by integrating new functions.
- Accessibility: Music Agent dynamically selects the most appropriate methods for each music-related task.
- Unity: Music Agent unifies a wide array of tools into a single system, incorporating Huggingface models, GitHub projects, and Web APIs.
- Modularity: Music Agent offers high modularity, allowing users to effortlessly enhance its capabilities by integrating new functions.

## Skills


## Installation

Expand All @@ -38,19 +41,20 @@ sudo apt-get install -y git-lfs
sudo apt-get install -y libsndfile1-dev
sudo apt-get install -y fluidsynth
sudo apt-get install -y ffmpeg
sudo apt-get install -y lilypond

# Clone the repository from TODO
git clone https://github.com/TODO
cd DIR
# Clone the repository from muzic
git clone https://github.com/muzic
cd muzic/agent
```

Next, install the dependent libraries. There might be some conflicts, but they should not affect the functionality of the system.

```bash
pip install --upgrade pip

pip install -r requirements.txt
pip install semantic-kernel
pip install -r requirements.txt
pip install numpy==1.23.0
pip install protobuf==3.20.3
```
Expand Down
31 changes: 21 additions & 10 deletions copilot/agent.py → agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from model_utils import lyric_format
from plugins import get_task_map, init_plugins

class MusicPilotAgent:
class MusicAgent:
"""
Attributes:
config_path: A path to a YAML file, referring to the example config.yaml
Expand Down Expand Up @@ -64,7 +64,7 @@ def _init_logger(self):

def _init_semantic_kernel(self):
skills_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "skills")
pilot_funcs = self.kernel.import_semantic_skill_from_directory(skills_directory, "MusicPilot")
pilot_funcs = self.kernel.import_semantic_skill_from_directory(skills_directory, "MusicAgent")

# task planning
self.task_planner = pilot_funcs["TaskPlanner"]
Expand Down Expand Up @@ -168,6 +168,9 @@ def collect_result(self, command, choose, inference_result):
return result

def run_task(self, input_text, command, results):
if self.error_event.is_set():
return

id = command["id"]
args = command["args"]
task = command["task"]
Expand Down Expand Up @@ -226,7 +229,7 @@ def run_task(self, input_text, command, results):
inference_result = []

for arg in command["args"]:
chat_input = f"[{input_text}] contains a task in JSON format {command}. Now you are a {command['task']} system, the arguments are {arg}. Just help me do {command['task']} and give me the resultwithout any additional description. The result must be in text form without any urls."
chat_input = f"[{input_text}] contains a task in JSON format {command}. Now you are a {command['task']} system, the arguments are {arg}. Just help me do {command['task']} and give me the result without any additional description."
response = self.skillchat(chat_input, self.chatbot, self.chat_context)
inference_result.append({"lyric":lyric_format(response)})

Expand Down Expand Up @@ -263,7 +266,12 @@ def run_task(self, input_text, command, results):
inference_result = self.model_inference(best_model_id, command, device=self.config["device"])

results[id] = self.collect_result(command, choose, inference_result)
return True
for result in inference_result:
if "error" in result:
self.error_event.set()
break

return

def chat(self, input_text):
start = time.time()
Expand All @@ -277,19 +285,22 @@ def chat(self, input_text):
except Exception as e:
self.logger.debug(e)
response = self.skillchat(input_text, self.chatbot, self.chat_context)
return response
return response, {"0": "Task parsing error, reply using ChatGPT."}

if len(tasks) == 0:
response = self.skillchat(input_text, self.chatbot, self.chat_context)
return response
return response, {"0": "No task detected, reply using ChatGPT."}

tasks = self.fix_depth(tasks)
results = {}
threads = []
d = dict()
retry = 0
self.error_event = threading.Event()
while True:
num_thread = len(threads)
if self.error_event.is_set():
break
for task in tasks:
# logger.debug(f"d.keys(): {d.keys()}, dep: {dep}")
for dep_id in task["dep"]:
Expand Down Expand Up @@ -326,21 +337,21 @@ def chat(self, input_text):
end = time.time()
during = end - start
self.logger.info(f"time: {during}s")
return response
return response, results

def parse_args():
parser = argparse.ArgumentParser(description="A path to a YAML file")
parser = argparse.ArgumentParser(description="music agent config")
parser.add_argument("--config", type=str, help="a YAML file path.")

args = parser.parse_args()
return args

if __name__ == "__main__":
args = parse_args()
agent = MusicPilotAgent(args.config, mode="cli")
agent = MusicAgent(args.config, mode="cli")
print("Input exit or quit to stop the agent.")
while True:
message = input("Send a message: ")
message = input("User input: ")
if message in ["exit", "quit"]:
break

Expand Down
File renamed without changes.
File renamed without changes.
205 changes: 205 additions & 0 deletions agent/gradio_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import uuid
import os
import gradio as gr
import re
import requests
from agent import MusicAgent
import soundfile
import argparse


all_messages = []
OPENAI_KEY = ""


def add_message(content, role):
message = {"role": role, "content": content}
all_messages.append(message)


def extract_medias(message):
# audio_pattern = re.compile(r"(http(s?):|\/)?([\.\/_\w:-])*?\.(flac|wav|mp3)")
audio_pattern = re.compile(r"(http(s?):|\/)?[a-zA-Z0-9\/.:-]*\.(flac|wav|mp3)")
symbolic_button = re.compile(r"(http(s?):|\/)?[a-zA-Z0-9\/.:-]*\.(mid)")

audio_urls = []
for match in audio_pattern.finditer(message):
if match.group(0) not in audio_urls:
audio_urls.append(match.group(0))

symbolic_urls = []
for match in symbolic_button.finditer(message):
if match.group(0) not in symbolic_urls:
symbolic_urls.append(match.group(0))

return list(set(audio_urls)), list(set(symbolic_urls))


def set_openai_key(openai_key):
global OPENAI_KEY
OPENAI_KEY = openai_key
agent._init_backend_from_input(openai_key)
if not OPENAI_KEY.startswith("sk-"):
return "OpenAI API Key starts with sk-", gr.update(visible=False)

return OPENAI_KEY, gr.update(visible=True)


def add_text(messages, message):
add_message(message, "user")
messages = messages + [(message, None)]
audio_urls, _ = extract_medias(message)

for audio_url in audio_urls:
if audio_url.startswith("http"):
ext = audio_url.split(".")[-1]
name = f"{str(uuid.uuid4()[:4])}.{ext}"
response = requests.get(audio_url)
with open(f"{agent.config['src_fold']}/{name}", "wb") as f:
f.write(response.content)
messages = messages + [(None, f"{audio_url} is saved as {name}")]

return messages, ""


def upload_audio(file, messages):
file_name = str(uuid.uuid4())[:4]
audio_load, sr = soundfile.read(file.name)
soundfile.write(f"{agent.config['src_fold']}/{file_name}.wav", audio_load, samplerate=sr)

messages = messages + [(None, f"Audio is stored in wav format as ** {file_name}.wav **"),
(None, (f"{agent.config['src_fold']}/{file_name}.wav",))]
return messages


def bot(messages):
message, results = agent.chat(messages[-1][0])

audio_urls, symbolic_urls = extract_medias(message)
add_message(message, "assistant")
messages[-1][1] = message
for audio_url in audio_urls:
if not audio_url.startswith("http") and not audio_url.startswith(agent.config['src_fold']):
audio_url = os.path.join(agent.config['src_fold'], audio_url)
messages = messages + [(None, f"** {audio_url.split('/')[-1]} **"),
(None, (audio_url,))]

for symbolic_url in symbolic_urls:
if not symbolic_url.startswith(agent.config['src_fold']):
symbolic_url = os.path.join(agent.config['src_fold'], symbolic_url)

try:
os.system(f"midi2ly {symbolic_url} -o {symbolic_url}.ly; lilypond -f png -o {symbolic_url} {symbolic_url}.ly")
except:
continue
messages = messages + [(None, f"** {symbolic_url.split('/')[-1]} **")]

if os.path.exists(f"{symbolic_url}.png"):
messages = messages + [ (None, (f"{symbolic_url}.png",))]
else:
s_page = 1
while os.path.exists(f"{symbolic_url}-page{s_page}.png"):
messages = messages + [ (None, (f"{symbolic_url}-page{s_page}.png",))]
s_page += 1

def truncate_strings(obj, max_length=128):
if isinstance(obj, str):
if len(obj) > max_length:
return obj[:max_length] + "..."
else:
return obj
elif isinstance(obj, dict):
return {key: truncate_strings(value, max_length) for key, value in obj.items()}
elif isinstance(obj, list):
return [truncate_strings(item, max_length) for item in obj]
else:
return obj

results = truncate_strings(results)
results = sorted(results.items(), key=lambda x: int(x[0]))
response = [(None, "\n\n".join([f"Subtask {r[0]}:\n{r[1]}" for r in results]))]

return messages, response


def clear_all_history(messages):
agent.clear_history()

messages = messages + [((None, "All LLM history cleared"))]
return messages

def parse_args():
parser = argparse.ArgumentParser(description="music agent config")
parser.add_argument("-c", "--config", type=str, help="a YAML file path.")

args = parser.parse_args()
return args


if __name__ == "__main__":
args = parse_args()
agent = MusicAgent(args.config, mode="gradio")

with gr.Blocks() as demo:
gr.HTML("""
<h1 align="center" style=" display: flex; flex-direction: row; justify-content: center; font-size: 25pt; ">🎧 Music Agent</h1>
<h3>This is a demo page for Music Agent, a project that uses LLM to integrate music tools. For specific functions, please refer to the examples given below, or refer to the instructions in Github.</h3>
<h3>Make sure the uploaded audio resource is in flac|wav|mp3 format.</h3>
<h3>Due to RPM limitations, Music Agent requires an OpenAI key for the paid version.</h3>
<div style="display: flex;"><a href='https://github.com/microsoft/muzic/tree/main/copilot'><img src='https://img.shields.io/badge/Github-Code-blue'></a></div>
""")

with gr.Row():
openai_api_key = gr.Textbox(
show_label=False,
placeholder="Set your OpenAI API key here and press Enter",
lines=1,
type="password",
)
state = gr.State([])

with gr.Row(visible=False) as interact_window:

with gr.Column(scale=0.7, min_width=500):
chatbot = gr.Chatbot([], elem_id="chatbot", label="Music-Agent Chatbot").style(height=500)

with gr.Tab("User Input"):
with gr.Row(scale=1):
with gr.Column(scale=0.6):
txt = gr.Textbox(show_label=False, placeholder="Press ENTER or click the Run button. You can start by asking 'What can you do?'").style(container=False)
with gr.Column(scale=0.1, min_width=0):
run = gr.Button("🏃‍♂️Run")
with gr.Column(scale=0.1, min_width=0):
clear_txt = gr.Button("🔄Clear️")
with gr.Column(scale=0.2, min_width=0):
btn = gr.UploadButton("☁️Upload Audio", file_types=["audio"])

with gr.Column(scale=0.3, min_width=300):
with gr.Tab("Intermediate Results"):
response = gr.Chatbot([], label="Current Progress").style(height=400)

openai_api_key.submit(set_openai_key, [openai_api_key], [openai_api_key, interact_window])
clear_txt.click(clear_all_history, [chatbot], [chatbot])

btn.upload(upload_audio, [btn, chatbot], [chatbot])
run.click(add_text, [chatbot, txt], [chatbot, txt]).then(
bot, chatbot, [chatbot, response]
)
txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
bot, chatbot, [chatbot, response]
)

gr.Examples(
examples=["What can you do?",
"Write a piece of lyric about the recent World Cup.",
"生成一首古风歌词的中文歌",
"Download a song by Jay Chou for me and separate the vocals and the accompanies.",
"Convert the vocals in /b.wav to a violin sound.",
"Give me the sheet music and lyrics in the song /a.wav",
"近一个月流行的音乐类型",
"把c.wav中的人声搭配合适的旋律变成一首歌"
],
inputs=txt
)

demo.launch(share=True)
File renamed without changes.
3 changes: 3 additions & 0 deletions copilot/models/download.sh → agent/models/download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
# Set models to download
models=(
"m3hrdadfi/wav2vec2-base-100k-gtzan-music-genres"
"lewtun/distilhubert-finetuned-music-genres"
"dima806/music_genres_classification"
"sander-wood/text-to-music"
"jonatasgrosman/whisper-large-zh-cv11"
"cvssp/audioldm-m-full"
)

# Set the current directory
Expand Down
Loading

0 comments on commit 6f23864

Please sign in to comment.