Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compatibility with OpenAI API and Whisper Webservice. #13

Merged
merged 14 commits into from
Nov 26, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion android/app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ android {
}

dependencies {
implementation("com.aallam.openai:openai-client:3.5.1")
implementation("io.ktor:ktor-client-okhttp:2.3.6")
implementation("androidx.core:core-ktx:1.9.0")
implementation("androidx.appcompat:appcompat:1.6.1")
Expand Down
1 change: 1 addition & 0 deletions android/app/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.WhisperToInput"
android:usesCleartextTraffic="true"
tools:targetApi="31">
<service android:name=".WhisperInputService"
android:label="Whisper Input"
Expand Down
101 changes: 86 additions & 15 deletions android/app/src/main/java/com/example/whispertoinput/MainActivity.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,34 @@ import android.provider.*
import android.view.View
import android.widget.Button
import android.widget.EditText
import android.widget.RadioGroup
import android.widget.Toast
import androidx.core.content.ContextCompat
import androidx.datastore.core.DataStore
import androidx.datastore.preferences.core.Preferences
import androidx.datastore.preferences.core.booleanPreferencesKey
import androidx.datastore.preferences.core.edit
import androidx.datastore.preferences.core.stringPreferencesKey
import androidx.datastore.preferences.preferencesDataStore
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.flow.first
import kotlinx.coroutines.flow.map
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext

private const val MICROPHONE_PERMISSION_REQUEST_CODE = 200
val Context.dataStore: DataStore<Preferences> by preferencesDataStore(name = "settings")
val ENDPOINT = stringPreferencesKey("endpoint")
val LANGUAGE_CODE = stringPreferencesKey("language-code")
val REQUEST_STYLE = booleanPreferencesKey("is-openai-api-request-style")
val API_KEY = stringPreferencesKey("api-key")

class MainActivity : AppCompatActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
setupApiKeyWidgets(this)
setupConfigWidgets(this)
checkPermissions()
}

Expand All @@ -54,53 +58,120 @@ class MainActivity : AppCompatActivity() {
startActivity(intent)
}

// Sets up API Key-related widgets.
private fun setupApiKeyWidgets(context: Context) {
// Sets up config widgets.
private fun setupConfigWidgets(context: Context) {
// TODO: Refactor. Perhaps use a class to process configuration UI widgets and behaviors.
// Launches a non-blocking job in the main thread.
// Perform data retrieval in the IO thread.
val endpointInput: EditText = findViewById(R.id.edittext_endpoint)
val btnSetEndpoint: Button = findViewById(R.id.btn_set_endpoint)
val languageCodeInput: EditText = findViewById(R.id.edittext_language_code)
val btnSetLanguageCode: Button = findViewById(R.id.btn_set_language_code)
val apiKeyInput: EditText = findViewById(R.id.edittext_api_key)
val btnSetApiKey: Button = findViewById(R.id.btn_set_api_key)
val requestStyleOption : RadioGroup = findViewById(R.id.radio_request_style)

CoroutineScope(Dispatchers.Main).launch {

// Disable input & button, and show loading hint
apiKeyInput.isEnabled = false
// Disable inputs, buttons & controls, and show loading hint
endpointInput.isEnabled = false
endpointInput.hint = getString(R.string.loading)
btnSetEndpoint.isEnabled = false
languageCodeInput.isEnabled = false
languageCodeInput.hint = getString(R.string.loading)
btnSetLanguageCode.isEnabled = false
apiKeyInput.hint = getString(R.string.loading)
btnSetApiKey.isEnabled = false
requestStyleOption.isEnabled = false

// Retrieve stored endpoint, language code, api key & request style
val retrievedEndpoint = withContext(Dispatchers.IO) {
return@withContext dataStore.data.map { preferences ->
preferences[ENDPOINT]
}.first()
}

val retrievedLanguageCode = withContext(Dispatchers.IO) {
return@withContext dataStore.data.map { preferences ->
preferences[LANGUAGE_CODE]
}.first()
}

val retrievedRequestStyle = withContext(Dispatchers.IO) {
return@withContext dataStore.data.map { preferences ->
preferences[REQUEST_STYLE]
}.first()
}

// Retrieve Api Key
val retrievedApiKey = withContext(Dispatchers.IO) {
return@withContext dataStore.data.map { preferences ->
preferences[API_KEY]
}.first()
}

// Set retrieved api key in input, or set "Enter API Key" hint
// Set retrieved endpoint in input, or set hint
if (retrievedEndpoint.isNullOrEmpty()) {
endpointInput.hint = getString(R.string.endpoint_hint)
} else {
endpointInput.setText(retrievedEndpoint)
}

// Set retrieved endpoint input, or set hint
// TODO: This could a dropdown list? Or radio group?
if (retrievedLanguageCode.isNullOrEmpty()) {
languageCodeInput.hint = getString(R.string.language_code_hint)
} else {
languageCodeInput.setText(retrievedLanguageCode)
}

// Set retrieved request style, or assign a default
if (retrievedRequestStyle == null) {
dataStore.edit { settings ->
settings[REQUEST_STYLE] = true
}
requestStyleOption.check(R.id.radio_btn_openai_api)
} else if (retrievedRequestStyle) {
requestStyleOption.check(R.id.radio_btn_openai_api)
} else {
requestStyleOption.check(R.id.radio_btn_whisper_webservice)
}

// Set retrieved api key
if (retrievedApiKey.isNullOrEmpty()) {
apiKeyInput.hint = getString(R.string.enter_openai_api_key)
apiKeyInput.hint = getString(R.string.api_key_hint)
} else {
apiKeyInput.setText(retrievedApiKey)
}

// Re-enable input & button
endpointInput.isEnabled = true
btnSetEndpoint.isEnabled = true
languageCodeInput.isEnabled = true
btnSetLanguageCode.isEnabled = true
apiKeyInput.isEnabled = true
btnSetApiKey.isEnabled = true
requestStyleOption.isEnabled = true

// After retrieval is done, assign onClick event to the setApiKey button
btnSetApiKey.setOnClickListener { onSetApiKey(context, apiKeyInput.text.toString()) }
// After retrieval is done, assign onClick event to the set buttons
btnSetEndpoint.setOnClickListener { onSetConfig(context, ENDPOINT, endpointInput.text.toString()) }
btnSetLanguageCode.setOnClickListener { onSetConfig(context, LANGUAGE_CODE, languageCodeInput.text.toString()) }
requestStyleOption.setOnCheckedChangeListener { _, checkedId ->
j3soon marked this conversation as resolved.
Show resolved Hide resolved
onSetConfig(context, REQUEST_STYLE, (checkedId == R.id.radio_btn_openai_api))
}
btnSetApiKey.setOnClickListener { onSetConfig(context, API_KEY, apiKeyInput.text.toString()) }
}
}

// The onClick event of the button set api key
private fun onSetApiKey(context: Context, newApiKey: String?) {
// The onClick event of set config buttons
private fun <T>onSetConfig(context: Context, key: Preferences.Key<T>, newValue: T) {
j3soon marked this conversation as resolved.
Show resolved Hide resolved
CoroutineScope(Dispatchers.Main).launch {
withContext(Dispatchers.IO) {
dataStore.edit { settings ->
settings[API_KEY] = newApiKey ?: ""
settings[key] = newValue
}
}

Toast.makeText(context, getString(R.string.api_key_successfully_set), Toast.LENGTH_SHORT).show()
Toast.makeText(context, getString(R.string.successfully_set), Toast.LENGTH_SHORT).show()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import android.content.pm.PackageManager
import android.widget.Toast

private const val RECORDED_AUDIO_FILENAME = "recorded.m4a"

private const val AUDIO_MEDIA_TYPE = "audio/mp4"
class WhisperInputService : InputMethodService() {
private var whisperKeyboard: WhisperKeyboard = WhisperKeyboard()
private var whisperJobManager: WhisperTranscriber = WhisperTranscriber()
Expand All @@ -23,7 +23,7 @@ class WhisperInputService : InputMethodService() {

private fun transcriptionCallback(text: String?) {
if (!text.isNullOrEmpty()) {
currentInputConnection?.commitText(text, text.length)
currentInputConnection?.commitText(text, 1)
}

whisperKeyboard.reset()
Expand Down Expand Up @@ -67,6 +67,7 @@ class WhisperInputService : InputMethodService() {
whisperJobManager.startAsync(
this,
recordedAudioFilename,
AUDIO_MEDIA_TYPE,
{ transcriptionCallback(it) },
{ transcriptionExceptionCallback(it) }
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,44 +1,63 @@
package com.example.whispertoinput

import android.content.Context
import android.util.Log
import com.aallam.openai.api.audio.TranscriptionRequest
import com.aallam.openai.api.file.FileSource
import com.aallam.openai.api.model.ModelId
import com.aallam.openai.client.OpenAI
import androidx.datastore.preferences.core.Preferences
import kotlinx.coroutines.*
import kotlinx.coroutines.flow.first
import kotlinx.coroutines.flow.map
import okio.FileSystem
import okio.Path.Companion.toPath
import okhttp3.Headers
import okhttp3.MediaType.Companion.toMediaTypeOrNull
import okhttp3.MultipartBody
import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.RequestBody
import okhttp3.RequestBody.Companion.asRequestBody
import java.io.File

class WhisperTranscriber {
private data class Config(
val endpoint: String,
val languageCode: String,
val isRequestStyleOpenaiApi: Boolean,
val apiKey: String
)

private var currentTranscriptionJob: Job? = null

fun startAsync(
context: Context,
filename: String,
mediaType: String,
callback: (String?) -> Unit,
exceptionCallback: (String) -> Unit
) {
suspend fun makeWhisperRequest(): String {
val apiKey = context.dataStore.data.map { preferences ->
preferences[API_KEY]
// Retrieve configs
val (endpoint, languageCode, isRequestStyleOpenaiApi, apiKey) = context.dataStore.data.map { preferences: Preferences ->
Config(
preferences[ENDPOINT] ?: "",
preferences[LANGUAGE_CODE] ?: "en",
preferences[REQUEST_STYLE] ?: true,
preferences[API_KEY] ?: ""
)
}.first()
val openai = OpenAI(
token = apiKey ?: "",
)
val request = TranscriptionRequest(
audio = FileSource(
name = filename,
source = FileSystem.SYSTEM.source(filename.toPath())
),
model = ModelId("whisper-1"),
language = "zh"

// Make request
val client = OkHttpClient()
val request = buildWhisperRequest(
filename,
"$endpoint?encode=true&task=transcribe&language=$languageCode&word_timestamps=false&output=txt",
j3soon marked this conversation as resolved.
Show resolved Hide resolved
mediaType,
apiKey,
isRequestStyleOpenaiApi
)
val transcription = openai.transcription(request)
val response = client.newCall(request).execute()

return transcription.text
// If request is not successful, or response code is weird
if (!response.isSuccessful || response.code / 100 != 2) {
j3soon marked this conversation as resolved.
Show resolved Hide resolved
throw Exception(response.body!!.string().replace('\n', ' '))
}
return response.body!!.string()
}

// Create a cancellable job in the main thread (for UI updating)
Expand Down Expand Up @@ -79,4 +98,38 @@ class WhisperTranscriber {
currentTranscriptionJob?.cancel()
currentTranscriptionJob = job
}

private fun buildWhisperRequest(
filename: String,
url: String,
mediaType: String,
apiKey: String,
isRequestStyleOpenaiApi: Boolean
): Request {
val file: File = File(filename)
val fileBody: RequestBody = file.asRequestBody(mediaType.toMediaTypeOrNull())
val requestBody: RequestBody = MultipartBody.Builder().apply {
setType(MultipartBody.FORM)
addFormDataPart("audio_file", "@audio.m4a", fileBody)

if (isRequestStyleOpenaiApi) {
addFormDataPart("file", "@audio.m4a", fileBody)
addFormDataPart("model", "whisper-1")
addFormDataPart("response_format", "text")
}
}.build()

val requestHeaders: Headers = Headers.Builder().apply {
if (isRequestStyleOpenaiApi) {
add("Authorization", "Bearer $apiKey")
}
add("Content-Type", "multipart/form-data")
}.build()
j3soon marked this conversation as resolved.
Show resolved Hide resolved

return Request.Builder()
.headers(requestHeaders)
.url(url)
.post(requestBody)
.build()
}
}
Loading