From f14dea891c6d6327beb058546485c2367ecd01f0 Mon Sep 17 00:00:00 2001 From: tigerpaws Date: Tue, 21 Nov 2023 16:38:34 +0800 Subject: [PATCH 01/14] feat(AndroidManifest): Allowed clear text traffic (HTTP). Refs: - https://blog.csdn.net/gengkui9897/article/details/82863966 - https://stackoverflow.com/questions/45940861/android-8-cleartext-http-traffic-not-permitted --- android/app/src/main/AndroidManifest.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/android/app/src/main/AndroidManifest.xml b/android/app/src/main/AndroidManifest.xml index 0924a66..a97254c 100644 --- a/android/app/src/main/AndroidManifest.xml +++ b/android/app/src/main/AndroidManifest.xml @@ -12,6 +12,7 @@ android:roundIcon="@mipmap/ic_launcher_round" android:supportsRtl="true" android:theme="@style/Theme.WhisperToInput" + android:usesCleartextTraffic="true" tools:targetApi="31"> Date: Tue, 21 Nov 2023 16:45:56 +0800 Subject: [PATCH 02/14] feat(.WhisperTranscriber): Connected with self-hosted inference server. Refs: - Studying of request bodies of the self-hosted server. - https://platform.openai.com/docs/guides/speech-to-text - https://zh.wikipedia.org/wiki/%E4%BA%92%E8%81%94%E7%BD%91%E5%AA%92%E4%BD%93%E7%B1%BB%E5%9E%8B (audio/mp4 not listed in the English page) - https://stackoverflow.com/questions/24279563/uploading-a-large-file-in-multipart-using-okhttp - https://blog.csdn.net/XuWei1213/article/details/79693340 --- android/app/build.gradle.kts | 1 - .../whispertoinput/WhisperTranscriber.kt | 53 ++++++++++--------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/android/app/build.gradle.kts b/android/app/build.gradle.kts index 0c81a39..539e668 100644 --- a/android/app/build.gradle.kts +++ b/android/app/build.gradle.kts @@ -36,7 +36,6 @@ android { } dependencies { - implementation("com.aallam.openai:openai-client:3.5.1") implementation("io.ktor:ktor-client-okhttp:2.3.6") implementation("androidx.core:core-ktx:1.9.0") implementation("androidx.appcompat:appcompat:1.6.1") diff --git a/android/app/src/main/java/com/example/whispertoinput/WhisperTranscriber.kt b/android/app/src/main/java/com/example/whispertoinput/WhisperTranscriber.kt index 5316d61..556ee64 100644 --- a/android/app/src/main/java/com/example/whispertoinput/WhisperTranscriber.kt +++ b/android/app/src/main/java/com/example/whispertoinput/WhisperTranscriber.kt @@ -1,16 +1,14 @@ package com.example.whispertoinput import android.content.Context -import android.util.Log -import com.aallam.openai.api.audio.TranscriptionRequest -import com.aallam.openai.api.file.FileSource -import com.aallam.openai.api.model.ModelId -import com.aallam.openai.client.OpenAI import kotlinx.coroutines.* -import kotlinx.coroutines.flow.first -import kotlinx.coroutines.flow.map -import okio.FileSystem -import okio.Path.Companion.toPath +import okhttp3.MediaType.Companion.toMediaTypeOrNull +import okhttp3.MultipartBody +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.RequestBody +import okhttp3.RequestBody.Companion.asRequestBody +import java.io.File class WhisperTranscriber { private var currentTranscriptionJob: Job? = null @@ -22,23 +20,14 @@ class WhisperTranscriber { exceptionCallback: (String) -> Unit ) { suspend fun makeWhisperRequest(): String { - val apiKey = context.dataStore.data.map { preferences -> - preferences[API_KEY] - }.first() - val openai = OpenAI( - token = apiKey ?: "", + val client = OkHttpClient() + val request = buildWhisperRequest( + filename, + "http://192.168.1.110:9000/asr?encode=true&task=transcribe&language=zh&word_timestamps=false&output=txt", + "audio/mp4" ) - val request = TranscriptionRequest( - audio = FileSource( - name = filename, - source = FileSystem.SYSTEM.source(filename.toPath()) - ), - model = ModelId("whisper-1"), - language = "zh" - ) - val transcription = openai.transcription(request) - - return transcription.text + val response = client.newCall(request).execute() + return response.body!!.string() } // Create a cancellable job in the main thread (for UI updating) @@ -79,4 +68,18 @@ class WhisperTranscriber { currentTranscriptionJob?.cancel() currentTranscriptionJob = job } + + private fun buildWhisperRequest(filename: String, url: String, mediaType: String): Request { + val file: File = File(filename) + val fileBody: RequestBody = file.asRequestBody(mediaType.toMediaTypeOrNull()) + val requestBody: RequestBody = MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("audio_file", "@audio.m4a", fileBody) + .build() + + return Request.Builder() + .url(url) + .post(requestBody) + .build() + } } From 16a0fbc894d8fa55253e46abc53b936fb2dcb0d3 Mon Sep 17 00:00:00 2001 From: tigerpaws Date: Tue, 21 Nov 2023 17:41:41 +0800 Subject: [PATCH 03/14] feat(activity_main.xml): Added fields in UI for endpoint and language code configuration. --- .../app/src/main/res/layout/activity_main.xml | 96 +++++++++++++------ 1 file changed, 68 insertions(+), 28 deletions(-) diff --git a/android/app/src/main/res/layout/activity_main.xml b/android/app/src/main/res/layout/activity_main.xml index 6527c4b..36c76ac 100644 --- a/android/app/src/main/res/layout/activity_main.xml +++ b/android/app/src/main/res/layout/activity_main.xml @@ -18,64 +18,104 @@ app:layout_constraintStart_toStartOf="parent" app:layout_constraintTop_toTopOf="parent" /> + +