diff --git a/audio_utils.py b/audio_utils.py
new file mode 100644
index 0000000..55cf29d
--- /dev/null
+++ b/audio_utils.py
@@ -0,0 +1,51 @@
+# audio_utils.py
+
+import csv
+import math
+import os
+from pydub import AudioSegment
+from tqdm import tqdm
+import tempfile
+
+
+class AudioSplit():
+    """Export audio file into smaller size and format that AI can understand."""
+
+    def __init__(self, filename, title="Output"):
+        """Init the Audio utils."""
+        self.folder = tempfile.TemporaryDirectory()
+        self.title = os.path.splitext(title)[0]
+        os.mkdir(self.folder.name + '/audio/')
+        if filename.endswith('.mp3'):
+            another = tempfile.TemporaryDirectory()
+            sound = AudioSegment.from_mp3(filename)
+            sound = sound.set_frame_rate(16000)
+            sound.export(another.name + '/orginal.wav', format="wav")
+            self.audio = AudioSegment.from_wav(another.name + '/orginal.wav')
+        elif filename.endswith('.wav'):
+            self.audio = AudioSegment.from_wav(filename)
+        self.metadata = 'metadata.csv'
+
+    def get_folder(self):
+        return self.folder.name
+
+    def get_duration(self):
+        return self.audio.duration_seconds
+
+    def single_split(self, from_sec, to_sec, split_filename):
+        time1 = from_sec * 1000
+        time2 = to_sec * 1000
+        split_audio = self.audio[time1:time2]
+        split_audio.export(self.folder.name + '/audio/' + split_filename, format="wav")
+
+    def multiple_split(self, sec_per_split):
+        with open(self.folder.name + '/audio/' + self.metadata, 'w', encoding='utf8') as csv_file:
+            writer = csv.writer(csv_file)
+            header = ['id', 'file_name', 'transcription']
+            writer.writerow(header)
+            total_sec = math.ceil(self.get_duration() / sec_per_split)
+            for i in tqdm(range(0, total_sec)):
+                split_fn = self.title + '_' + str(i) + '.wav'
+                self.single_split(i*sec_per_split, i*sec_per_split + sec_per_split, split_fn)
+                data = [i, split_fn, "Export wave file " + str(i)]
+                writer.writerow(data)
diff --git a/data/audio/i_dont_get.wav b/data/audio/i_dont_get.wav
new file mode 100644
index 0000000..c149e7e
Binary files /dev/null and b/data/audio/i_dont_get.wav differ
diff --git a/data/audio/suse_intro.wav b/data/audio/suse_intro.wav
new file mode 100644
index 0000000..0744cf7
Binary files /dev/null and b/data/audio/suse_intro.wav differ
diff --git a/demo_assistant.py b/demo_assistant.py
new file mode 100644
index 0000000..a04f83c
--- /dev/null
+++ b/demo_assistant.py
@@ -0,0 +1,163 @@
+import io
+import tempfile
+import pyaudio
+from pydub import AudioSegment
+import wave
+import re
+import time
+
+import queue
+import threading
+from transformers import pipeline
+from datasets import load_dataset
+from faster_whisper import WhisperModel
+import torch
+from TTS.api import TTS
+from gpt4all import GPT4All
+
+from audio_utils import AudioSplit
+
+
+CHUNK = 1024
+FORMAT = pyaudio.paInt16 # 16-bit resolution
+CHANNELS = 1
+RATE = 16000 # sample rate
+DURATION = 3
+Q_DURATION = 7
+SUSE = r"s*u*s*e"
+THANK= r"Thank\s*(?:you|u)\b"
+
+
+g_active = False
+g_wait = False
+g_lock = threading.Lock()
+counter = 0
+
+p_audio = pyaudio.PyAudio()
+playback_stream = p_audio.open(format=FORMAT, channels=CHANNELS, rate=24000, output=True)
+record_stream = p_audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+def record_audio():
+    global p_audio
+    global g_active
+    global g_wait
+    print("Recording started...")
+    while True:
+        if g_active == True:
+            print("c", end="")
+            if g_wait == True:
+                print("w", end="")
+                while not audio_queue.empty():
+                    audio_queue.get()
+                time.sleep(1)
+                continue
+        audio_data = record_stream.read(CHUNK, exception_on_overflow=False)
+        audio_queue.put(audio_data)
+        print(".", end="")
+            
+    record_stream.stop_stream()
+    record_stream.close()
+    p_audio.terminate()
+
+def speech_to_text():
+    global p_audio
+    global g_active
+    global g_wait
+    while True:
+        if g_active:
+            time_duration = Q_DURATION
+        else:
+            time_duration = DURATION
+        tf = tempfile.NamedTemporaryFile(suffix=".wav", delete=True, mode='wb')
+        mp3_tf = tempfile.NamedTemporaryFile(suffix=".mp3", delete=True, mode='wb')
+        with wave.open(tf.name, 'wb') as wav_file:
+            wav_file.setnchannels(CHANNELS)
+            wav_file.setsampwidth(p_audio.get_sample_size(FORMAT))
+            wav_file.setframerate(RATE)
+            # Read audio data from the stream for the specified duration
+            for i in range(0, RATE // CHUNK * time_duration):
+                print("r", end="")
+                audio_data = audio_queue.get()
+                wav_file.writeframes(audio_data)
+            audio_queue.task_done()
+        print(f"{time_duration} sec recording done.")
+        # Perform speech recognition
+        audio = AudioSegment.from_wav(tf.name)
+        audio.export(mp3_tf.name, format="mp3")
+        # segments, info = model.transcribe(mp3_tf_filename, beam_size=5)
+        segments, _ = model.transcribe(mp3_tf.name)
+        questions = []
+        if g_active:
+            counter += 1
+            for segment in segments:
+                print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+                if segment.text:
+                    questions.append(segment.text)
+            q = re.sub(THANK, "", " ".join(questions))
+            print(f"Question:{q} counter{counter}")
+            if len(q) > 40 and counter >= 2:
+                counter = 0
+                output = gpt_model.generate(" ".join(questions), max_tokens=50)
+                print(f"Answer:{output}")
+                reply_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=True, mode='wb')
+                tts.tts_to_file(text=output, file_path=reply_wav.name)
+                play(playback_stream, reply_wav.name)
+                with g_lock:
+                    g_active = False
+                time.sleep(5)
+            continue
+        for segment in segments:
+            print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+            text_input = segment.text.lower()
+            if text_input.find("hey") != -1:
+                if re.search(SUSE, text_input):
+                    counter = 1
+                    with g_lock:
+                        g_active = True
+                        g_wait = True
+                    play(playback_stream, "data/audio/suse_intro.wav")
+                    print("Finish suse")
+                    with g_lock:
+                        g_wait = False
+                    time.sleep(5)
+
+def play(play_stream, filename):
+    wave_file = wave.open(filename, 'rb')
+    print(f"Wave: rate={wave_file.getframerate()} channels={wave_file.getnchannels()} width={wave_file.getsampwidth()}")
+    out_data = wave_file.readframes(CHUNK)
+    while out_data:
+        play_stream.write(out_data)
+        out_data = wave_file.readframes(CHUNK)
+
+# Get device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Init TTS
+# tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+tts = TTS("tts_models/en/blizzard2013/capacitron-t2-c150_v2").to(device)
+gpt_model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
+# Create a queue to share audio data between threads
+audio_queue = queue.Queue()
+# model_size = "large-v2"
+model_size = "small.en"
+# model_size = "tiny.en"
+
+# Run on GPU with FP16
+model = WhisperModel(model_size, device="cpu", compute_type="int8")
+
+
+print(f"Stream: playback->{playback_stream.get_write_available()}")
+# Create and start the recording thread
+recording_thread = threading.Thread(target=record_audio)
+recording_thread.start()
+
+# Create and start the speech-to-text thread
+speech_to_text_thread = threading.Thread(target=speech_to_text)
+speech_to_text_thread.start()
+
+# Wait for the recording thread to finish (you can define conditions to stop the recording)
+recording_thread.join()
+
+# Stop the speech-to-text thread
+speech_to_text_thread.join()
+
+p.terminate()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..7e3995a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+pyaudio
+pydub
+transformers
+datasets
+faster_whisper
+torch
+TTS
+gpt4all
diff --git a/setup.sh b/setup.sh
new file mode 100644
index 0000000..b579072
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1 @@
+sudo zypper install python311-devel portaudio-devel espeak-ng
diff --git a/test/coqui_tts_out.py b/test/coqui_tts_out.py
new file mode 100644
index 0000000..47b61ac
--- /dev/null
+++ b/test/coqui_tts_out.py
@@ -0,0 +1,25 @@
+import io
+import tempfile
+import pyaudio
+from pydub import AudioSegment
+import wave
+
+from transformers import pipeline
+from datasets import load_dataset
+
+from audio_utils import AudioSplit
+
+
+from faster_whisper import WhisperModel
+import torch
+from TTS.api import TTS
+
+# Get device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Init TTS
+# tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+tts = TTS("tts_models/en/blizzard2013/capacitron-t2-c150_v2").to(device)
+# tts.tts_to_file(text="Hello, I am your SUSE ... assistant. What can I do for you today?", file_path="/tmp/out.wav")
+tts.tts_to_file(text="Sorry, I don't get your question, please say that again please.", file_path="/tmp/out.wav")
+
diff --git a/test/fast_whisper.py b/test/fast_whisper.py
new file mode 100644
index 0000000..15ce4f5
--- /dev/null
+++ b/test/fast_whisper.py
@@ -0,0 +1,111 @@
+import io
+import tempfile
+import pyaudio
+from pydub import AudioSegment
+import wave
+import re
+import queue
+from transformers import pipeline
+from datasets import load_dataset
+from faster_whisper import WhisperModel
+import torch
+from TTS.api import TTS
+
+from audio_utils import AudioSplit
+
+CHUNK = 1024
+FORMAT = pyaudio.paInt16 # 16-bit resolution
+CHANNELS = 1
+RATE = 16000 # sample rate
+DURATION = 2
+SUSE = r"s*u*s*e"
+
+p = pyaudio.PyAudio()
+def record(stream):
+    print("Recording started...")
+    while True:
+        audio_data = stream.read(CHUNK)
+        audio_queue.put(audio_data)
+
+    stream.stop_stream()
+    stream.close()
+    audio.terminate()
+
+def play(play_stream, filename):
+    wave_file = wave.open(filename, 'rb')
+    print(f"Wave: rate={wave_file.getframerate()} channels={wave_file.getnchannels()} width={wave_file.getsampwidth()}")
+    out_data = wave_file.readframes(CHUNK)
+    while out_data:
+        play_stream.write(out_data)
+        out_data = wave_file.readframes(CHUNK)
+
+# Get device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Init TTS
+# tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+tts = TTS("tts_models/en/blizzard2013/capacitron-t2-c150_v2").to(device)
+# Create a queue to share audio data between threads
+audio_queue = queue.Queue()
+# model_size = "large-v2"
+model_size = "small.en"
+# model_size = "tiny.en"
+
+# Run on GPU with FP16
+model = WhisperModel(model_size, device="cpu", compute_type="int8")
+
+device = ""
+for i in range(p.get_device_count()):
+    device = p.get_device_info_by_index(i)
+    if device['name']=="default":
+        print(device)
+        break
+
+
+playback_stream = p.open(format=p.get_format_from_width(2),
+                             channels=1,
+                             rate=24000,
+                             # output_device_index = device['index'],
+                             output=True)
+
+print(f"Stream: playback->{playback_stream.get_write_available()}")
+# generator = pipeline(task="automatic-speech-recognition", model="microsoft/speecht5_asr")
+
+while True:
+    tf = tempfile.NamedTemporaryFile(suffix=".wav", delete=True, mode='wb')
+    mp3_tf = tempfile.NamedTemporaryFile(suffix=".mp3", delete=True, mode='wb')
+    temp_filename = tf.name
+    mp3_tf_filename = mp3_tf.name
+    with wave.open(temp_filename, 'wb') as wav_file:
+        wav_file.setnchannels(CHANNELS)
+        wav_file.setsampwidth(p.get_sample_size(FORMAT))
+        wav_file.setframerate(RATE)
+        stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+        print("Listening...")
+        frames = []
+        for i in range(0, RATE // CHUNK * DURATION):
+            # Read audio data from the stream for the specified duration
+            audio_data = stream.read(CHUNK)
+            frames.append(audio_data)
+            wav_file.writeframes(audio_data)
+         #print(f"{DURATION} sec recording done.")
+        stream.close()
+
+    audio = AudioSegment.from_wav(temp_filename)
+    audio.export(mp3_tf_filename, format="mp3")
+    # segments, info = model.transcribe(mp3_tf_filename, beam_size=5)
+    # print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
+    segments, _ = model.transcribe(mp3_tf_filename)
+    for segment in segments:
+        print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+        # out_wav = tempfile.NamedTemporaryFile(suffix=".mp3", delete=True, mode='wb')
+        text_input = segment.text.lower()
+        if text_input.find("hey") != -1:
+            if re.search(SUSE, text_input):
+                # wav = tts.tts(text=segment.text, speaker_wav=speak_wav.name, language="en")
+                # tts.tts_to_file(text="This is SUSE assistant what can I do for you today?", language="en", file_path=out_wav.name)
+                # tts.tts_to_file(text="This is SUSE assistant what can I do for you today?", file_path=out_wav.name)
+                # wave_file = wave.open(out_wav.name, 'rb')
+                play(playback_stream, "data/audio/suse_intro.wav")
+
+p.terminate()
diff --git a/test/fast_whisper2.py b/test/fast_whisper2.py
new file mode 100644
index 0000000..2dd95be
--- /dev/null
+++ b/test/fast_whisper2.py
@@ -0,0 +1,158 @@
+import io
+import tempfile
+import pyaudio
+from pydub import AudioSegment
+import wave
+import re
+import time
+
+import queue
+import threading
+from transformers import pipeline
+from datasets import load_dataset
+from faster_whisper import WhisperModel
+import torch
+from TTS.api import TTS
+from gpt4all import GPT4All
+
+from audio_utils import AudioSplit
+
+
+CHUNK = 1024
+FORMAT = pyaudio.paInt16 # 16-bit resolution
+CHANNELS = 1
+RATE = 16000 # sample rate
+DURATION = 5
+SUSE = r"s*u*s*e"
+THANK= r"Thank\s*(?:you|u)\b"
+
+
+g_active = False
+g_wait = False
+g_lock = threading.Lock()
+counter = 0
+
+p_audio = pyaudio.PyAudio()
+playback_stream = p_audio.open(format=FORMAT, channels=CHANNELS, rate=24000, output=True)
+record_stream = p_audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+def record_audio():
+    global p_audio
+    global g_active
+    global g_wait
+    print("Recording started...")
+    while True:
+        if g_active == True:
+            print("c", end="")
+            if g_wait == True:
+                print("w", end="")
+                while not audio_queue.empty():
+                    audio_queue.get()
+                time.sleep(1)
+                continue
+        audio_data = record_stream.read(CHUNK)
+        audio_queue.put(audio_data)
+        print(".", end="")
+            
+    record_stream.stop_stream()
+    record_stream.close()
+    p_audio.terminate()
+
+def speech_to_text():
+    global p_audio
+    global g_active
+    global g_wait
+    while True:
+        tf = tempfile.NamedTemporaryFile(suffix=".wav", delete=True, mode='wb')
+        mp3_tf = tempfile.NamedTemporaryFile(suffix=".mp3", delete=True, mode='wb')
+        with wave.open(tf.name, 'wb') as wav_file:
+            wav_file.setnchannels(CHANNELS)
+            wav_file.setsampwidth(p_audio.get_sample_size(FORMAT))
+            wav_file.setframerate(RATE)
+            # Read audio data from the stream for the specified duration
+            for i in range(0, RATE // CHUNK * DURATION):
+                print("r", end="")
+                audio_data = audio_queue.get()
+                wav_file.writeframes(audio_data)
+            audio_queue.task_done()
+        print(f"{DURATION} sec recording done.")
+        # Perform speech recognition
+        audio = AudioSegment.from_wav(tf.name)
+        audio.export(mp3_tf.name, format="mp3")
+        # segments, info = model.transcribe(mp3_tf_filename, beam_size=5)
+        segments, _ = model.transcribe(mp3_tf.name)
+        questions = []
+        if g_active:
+            counter += 1
+            for segment in segments:
+                print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+                if segment.text:
+                    questions.append(segment.text)
+            q = re.sub(THANK, "", " ".join(questions))
+            print(f"Question:{q} counter{counter}")
+            if len(q) > 40 and counter > 3:
+                counter = 0
+                output = gpt_model.generate(" ".join(questions), max_tokens=50)
+                print(f"Answer:{output}")
+                reply_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=True, mode='wb')
+                tts.tts_to_file(text=output, file_path=reply_wav.name)
+                play(playback_stream, reply_wav.name)
+                with g_lock:
+                    g_active = False
+                time.sleep(5)
+            continue
+        for segment in segments:
+            print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+            text_input = segment.text.lower()
+            if text_input.find("hey") != -1:
+                if re.search(SUSE, text_input):
+                    counter = 1
+                    with g_lock:
+                        g_active = True
+                        g_wait = True
+                    play(playback_stream, "data/audio/suse_intro.wav")
+                    print("Finish suse")
+                    with g_lock:
+                        g_wait = False
+                    time.sleep(5)
+
+def play(play_stream, filename):
+    wave_file = wave.open(filename, 'rb')
+    print(f"Wave: rate={wave_file.getframerate()} channels={wave_file.getnchannels()} width={wave_file.getsampwidth()}")
+    out_data = wave_file.readframes(CHUNK)
+    while out_data:
+        play_stream.write(out_data)
+        out_data = wave_file.readframes(CHUNK)
+
+# Get device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Init TTS
+# tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+tts = TTS("tts_models/en/blizzard2013/capacitron-t2-c150_v2").to(device)
+gpt_model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")
+# Create a queue to share audio data between threads
+audio_queue = queue.Queue()
+# model_size = "large-v2"
+model_size = "small.en"
+# model_size = "tiny.en"
+
+# Run on GPU with FP16
+model = WhisperModel(model_size, device="cpu", compute_type="int8")
+
+
+print(f"Stream: playback->{playback_stream.get_write_available()}")
+# Create and start the recording thread
+recording_thread = threading.Thread(target=record_audio)
+recording_thread.start()
+
+# Create and start the speech-to-text thread
+speech_to_text_thread = threading.Thread(target=speech_to_text)
+speech_to_text_thread.start()
+
+# Wait for the recording thread to finish (you can define conditions to stop the recording)
+recording_thread.join()
+
+# Stop the speech-to-text thread
+speech_to_text_thread.join()
+
+p.terminate()
diff --git a/test/pyttsx_test.py b/test/pyttsx_test.py
new file mode 100644
index 0000000..1f8f732
--- /dev/null
+++ b/test/pyttsx_test.py
@@ -0,0 +1,19 @@
+import pyttsx3
+
+def text_to_speech(text):
+    # Initialize the TTS engine
+    engine = pyttsx3.init()
+
+    # Set properties (optional)
+    engine.setProperty('rate', 130)  # Speed of speech
+    engine.setProperty('volume', 0.6)  # Volume level (0.0 to 1.0)
+
+    # Convert text to speech
+    engine.say(text)
+
+    # Wait for the speech to finish
+    engine.runAndWait()
+
+# Example usage
+text = "Hello, this is a simple text-to-speech example in Python."
+text_to_speech(text)
diff --git a/test/test_audio.py b/test/test_audio.py
new file mode 100644
index 0000000..e780e5f
--- /dev/null
+++ b/test/test_audio.py
@@ -0,0 +1,33 @@
+import pyaudio
+import wave
+
+filename = '/tmp/out.wav'
+
+# Set chunk size of 1024 samples per data frame
+chunk = 1024  
+
+# Open the sound file 
+wf = wave.open(filename, 'rb')
+
+# Create an interface to PortAudio
+p = pyaudio.PyAudio()
+
+# Open a .Stream object to write the WAV file to
+# 'output = True' indicates that the sound will be played rather than recorded
+stream = p.open(format = p.get_format_from_width(wf.getsampwidth()),
+                channels = wf.getnchannels(),
+                rate = wf.getframerate(),
+                output = True)
+
+# Read data in chunks
+data = wf.readframes(chunk)
+
+# Play the sound by writing the audio data to the stream
+while data != '':
+    stream.write(data)
+    data = wf.readframes(chunk)
+
+# Close and terminate the stream
+stream.close()
+p.terminate()
+