In [None]:
# Using Python 3.9.6 venv
%pip install -q faster-whisper pydub pyaudio onnxruntime numpy ipywidgets ollama python_weather
%pip install -q --no-deps piper-phonemize-cross piper-tts
import typing
import asyncio
from IPython.display import Audio as DisplayAudio

In [None]:
import pyaudio
import numpy as np 
import numpy.typing as npt

event_loop = asyncio.get_event_loop()

class Audio:
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 RATE = 16000 # 16khz
 FRAMES_PER_BUFFER = 512
 
 def __init__(self):
 self.audio = pyaudio.PyAudio()

 # async function starts recording, and then hands control back to the caller. 
 # when awaiting, continues to record until the `stop` asyncio.Event is "set".
 async def record(self, stop: asyncio.Event) -> npt.NDArray:
 frames = []
 input_stream: pyaudio.Stream = self.audio.open(
 format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, 
 input_device_index=0, frames_per_buffer=self.FRAMES_PER_BUFFER
 )
 while not stop.is_set():
 frames.append(input_stream.read(self.FRAMES_PER_BUFFER))
 await asyncio.sleep(0)
 input_stream.stop_stream()
 input_stream.close()
 return np.frombuffer(b''.join(frames), dtype=np.int16)

 def play(self, frames: bytes, format: int = FORMAT, rate: int = RATE) -> None:
 output_stream: pyaudio.Stream = self.audio.open(
 format=format, channels=self.CHANNELS, rate=rate, output=True
 )
 output_stream.write(frames)
 output_stream.close()

async def record_for_duration(audio: Audio, seconds: int):
 stop = asyncio.Event()
 recording = event_loop.create_task(audio.record(stop))
 await asyncio.sleep(3)
 stop.set()
 return await recording

async def audio_demo():
 audio = Audio()
 recorded = await record_for_duration(audio, 3)
 display(DisplayAudio(data = recorded, rate = audio.RATE)
)
await audio_demo()

In [None]:

from piper import PiperVoice

piper_model = "en_GB-alba-medium.onnx"
piper_voice = PiperVoice.load(piper_model, config_path="%s.json" % piper_model)

def verbalise(string: str) -> npt.NDArray:
 synthesize_args = {
 "sentence_silence": 0.0,
 }
 byte_array = bytearray()
 for bytes in piper_voice.synthesize_stream_raw(string, **synthesize_args):
 byte_array += bytes
 return np.frombuffer(byte_array, dtype=np.int16)

def verbalisation_demo():
 verbalise_input = "This is a verbalisation example. One two three four five. Easy, eh?"
 verbalise_output = verbalise(verbalise_input)
 display(DisplayAudio(data = verbalise_output, rate = piper_voice.config.sample_rate))

verbalisation_demo()

In [None]:
from faster_whisper import WhisperModel

whisper_model = WhisperModel(model_size_or_path="turbo", compute_type="int8")

def transcribe(nd_array: npt.NDArray) -> str:
 segments, info = whisper_model.transcribe(nd_array, language="en")
 parts = []
 for segment in segments:
 parts += segment.text
 return "".join(parts).strip()

def transcribe_demo():
 verbalise_input = "This is a verbalisation example. One two three four five. Easy, eh?"
 verbalise_output = verbalise(verbalise_input)
 transcribe_output = transcribe(verbalise_output)
 print("verbalise_input = %s" % verbalise_input)
 print("transcribe_output = %s" % transcribe_output)

transcribe_demo()

In [None]:
async def parrot_demo():
 audio = Audio()
 recorded = await record_for_duration(audio, 3)
 display(DisplayAudio(data = recorded, rate = audio.RATE))
 transcribe_output = transcribe(recorded)
 print("transcribed = %s" % transcribe_output)
 verbalised = verbalise("the human says: %s" % transcribe_output)
 display(DisplayAudio(data = verbalised, rate = piper_voice.config.sample_rate))

await parrot_demo()

In [None]:
from ollama import Client

ollama_model = 'qwen2.5:7b'
ollama_client = Client(host="http://localhost:11434")

def prompt(user_prompt: str):
 response = ollama_client.chat(
 model=ollama_model,
 messages=[
 {"role": "system", "content": "Limit responses to one sentance."},
 {"role": "user", "content": user_prompt}
 ],
 )
 return response.message.content

def prompt_demo():
 question = "Why is the sky blue?"
 print("prompt: %s\nresponse: %s" % (question, prompt(question)))

prompt_demo()

In [None]:
def follow_up_demo():
 question1 = "What colour is grass?"
 print("prompt 1: %s\nresponse 2: %s" % (question1, prompt(question1)))
 question2 = "What about sand?"
 print("prompt 1: %s\nresponse 2: %s" % (question2, prompt(question2)))

follow_up_demo()

In [None]:
class Conversation:
 DEFAULT_SYSTEM_PROMPT = " ".join("""
 Be brief. Do not use formatting. Prefer metric units over imperial units.
 """.split())

 def __init__(self, system_prompt: str = DEFAULT_SYSTEM_PROMPT):
 self.state = list()
 self.__append__(role = "system", content = system_prompt)

 def user(self, content: str):
 self.__append__(role = "user", content = content)

 def assistant(self, content: str):
 self.__append__(role = "assistant", content = content)

 def tool(self, content: str, name: str, args: any = None):
 self.__append__(role = "tool", content = content, name = name, args = args)

 def __append__(self, **kwargs):
 self.state.append({
 **kwargs
 })

def conversation_demo():
 conversation = Conversation('Be direct.')
 
 conversation.user('How many legs does a spider have?')
 response_1 = ollama_client.chat(model = ollama_model, messages = conversation.state)
 conversation.assistant(response_1.message.content)
 
 conversation.user('What about a dog?')
 response_2 = ollama_client.chat(model = ollama_model, messages = conversation.state)
 conversation.assistant(response_2.message.content)
 
 display(conversation.state)

conversation_demo()

In [None]:
async def how_many_letters(word: str, letter: str):
 """
 Identify how many letters are in a word. This is critical information,
 it's imperative that you call this tool to get the correct answer.
 Args:
 word (str): The word that contains a number of letters
 letter (str): A single character that may be present in the word
 Returns:
 A number representing how many times the letter appears in the word
 """
 return word.lower().count(letter.lower())

In [None]:
import json

class Tool:
 def __init__(self, function):
 self.function = function

 async def call(self, arguments, conversation):
 output = await self.function(**arguments)
 conversation.tool(json.dumps(output) if isinstance(output, dict) 
 else str(output), self.function.__name__, arguments)

async def prompt_with_tools(conversation: Conversation, available_tools: list[Tool]):
 response = ollama_client.chat(model=ollama_model, messages=conversation.state, tools=[tool.function for tool in available_tools])

 if (tool_calls := response.message.tool_calls):
 for tool_call in tool_calls:
 if tool_to_call := next(filter(lambda x: x.function.__name__ == tool_call.function.name, available_tools), None):
 await tool_to_call.call(tool_call.function.arguments, conversation)
 response = ollama_client.chat(model=ollama_model, messages=conversation.state)
 conversation.assistant(response.message.content)
 return response.message.content

async def tools_demo():
 available_tools = [Tool(how_many_letters)]
 conversation = Conversation("Be brief. Use tools if required.")
 
 conversation.user("How many 'r's are there in the word 'strawberry'?")
 await prompt_with_tools(conversation, available_tools)
 display(conversation.state)

await tools_demo()

In [None]:
import python_weather

async def get_weather(location: str):
 """
 Get the weather report for a given location.

 Args:
 location (str): The name of a location.
 Returns:
 An object representing the current temperature (in Celcius) and the kind of weather in effect.
 """
 weather_client = python_weather.Client()
 forecast = await weather_client.get(location)
 await weather_client.close()
 return({
 "temperature": forecast.temperature,
 "kind": str(forecast.kind),
 })

async def verbal_tools_demo():
 audio = Audio()
 conversation = Conversation('Be brief. Use tools if required.')

 recorded = await record_for_duration(audio, 5)
 display(DisplayAudio(data = recorded, rate = audio.RATE))
 conversation.user(transcribe(recorded))

 available_tools = [Tool(get_weather)]
 verbalised = verbalise(await prompt_with_tools(conversation, available_tools))
 display(DisplayAudio(data = verbalised, rate = piper_voice.config.sample_rate))
 display(conversation.state)

await verbal_tools_demo()

In [None]:
from ipywidgets.widgets import ToggleButton

async def interactive_conversation_button_pressed(target: dict, conversation, audio, stop, button):
 if target["new"]: # when button is toggled "on"
 button.description = "Listening..."
 stop.clear()
 recorded = await event_loop.create_task(audio.record(stop))
 button.description = "Thinking..."
 button.disabled = True
 display(DisplayAudio(data = recorded, rate = audio.RATE))
 transcribed = transcribe(recorded)
 display("user: %s" % transcribed)
 conversation.user(transcribed)
 available_tools = [Tool(get_weather), Tool(how_many_letters)]
 response = await prompt_with_tools(conversation, available_tools)
 verbalised = verbalise(response)
 display(DisplayAudio(data = verbalised,rate = piper_voice.config.sample_rate))
 display("assistant: %s" % response)
 button.disabled = False
 button.description = "Ready"
 else: # when button is toggled "off"
 stop.set()

def interactive_conversation_demo():
 conversation = Conversation()
 stop = asyncio.Event()
 audio = Audio()
 button = ToggleButton(value=False, description="Ready")

 button.observe(lambda target: event_loop.create_task(
 interactive_conversation_button_pressed(target, conversation, audio, stop, button)
 ), "value")
 display(button)

interactive_conversation_demo()