microsoft · compulim · Oct 5, 2019 · Oct 4, 2019 · Oct 4, 2019 · Oct 4, 2019
@@ -96,6 +96,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 -  Fix [#2360](https://github.com/microsoft/BotFramework-WebChat/issues/2360). Timestamp should update on language change, by [@compulim](https://github.com/compulim) in PR [#2414](https://github.com/microsoft/BotFramework-WebChat/pull/2414)
 -  Fix [#2428](https://github.com/microsoft/BotFramework-WebChat/issues/2428). Should interrupt speech synthesis after microphone button is clicked, by [@compulim](https://github.com/compulim) in PR [#2429](https://github.com/microsoft/BotFramework-WebChat/pull/2429)
 -  Fix [#2422](https://github.com/microsoft/BotFramework-WebChat/issues/2422). Store thumbnail URL using the activity's `attachment.thumbnailUrl` field, by [@compulim](https://github.com/compulim) in PR [#2433](https://github.com/microsoft/BotFramework-WebChat/pull/2433)
+-  Fix [#2435](https://github.com/microsoft/BotFramework-WebChat/issues/2435). Fix microphone button getting stuck on voice-triggered expecting input hint without a speech synthesis engine, by [@compulim](https://github.com/compulim) in PR [#2445](https://github.com/microsoft/BotFramework-WebChat/pull/2445)
 
 ### Added
 

@@ -124,4 +124,28 @@ describe('speech synthesis', () => {
     await expect(speechRecognitionStartCalled().fn(driver)).resolves.toBeTruthy();
     await driver.wait(negateCondition(speechSynthesisUtterancePended()), timeouts.ui);
   });
+
+  describe('without speech synthesis', () => {
+    test('should start recognition immediately after receiving expected input hint', async () => {
+      const { driver, pageObjects } = await setupWebDriver({
+        props: {
+          webSpeechPonyfillFactory: () => {
+            const { SpeechGrammarList, SpeechRecognition } = window.WebSpeechMock;
+
+            return {
+              SpeechGrammarList,
+              SpeechRecognition
+            };
+          }
+        }
+      });
+
+      await pageObjects.sendMessageViaMicrophone('input hint expected');
+
+      await driver.wait(minNumActivitiesShown(2), timeouts.directLine);
+
+      await expect(speechRecognitionStartCalled().fn(driver)).resolves.toBeTruthy();
+      await driver.wait(negateCondition(speechSynthesisUtterancePended()), timeouts.ui);
+    });
+  });
 });
@@ -9,6 +9,11 @@ import connectToWebChat from './connectToWebChat';
 import ScrollToEndButton from './Activity/ScrollToEndButton';
 import SpeakActivity from './Activity/Speak';
 
+import {
+  speechSynthesis as bypassSpeechSynthesis,
+  SpeechSynthesisUtterance as BypassSpeechSynthesisUtterance
+} from './Speech/BypassSpeechSynthesisPonyfill';
+
 const ROOT_CSS = css({
   overflow: 'hidden',
   position: 'relative'
@@ -85,7 +90,11 @@ const BasicTranscript = ({
     <div className={classNames(ROOT_CSS + '', className + '')} role="log">
       <ScrollToBottomPanel className={PANEL_CSS + ''}>
         <div className={FILLER_CSS} />
-        <SayComposer speechSynthesis={speechSynthesis} speechSynthesisUtterance={SpeechSynthesisUtterance}>
+        <SayComposer
+          // These are props for passing in Web Speech ponyfill, where speech synthesis requires these two class/object to be ponyfilled.
+          speechSynthesis={speechSynthesis || bypassSpeechSynthesis}
+          speechSynthesisUtterance={SpeechSynthesisUtterance || BypassSpeechSynthesisUtterance}
+        >
           <ul
             aria-atomic="false"
             aria-live="polite"
@@ -95,7 +104,7 @@ const BasicTranscript = ({
           >
             {activityElements.map(({ activity, element }, index) => (
               <li
-                /* Because of differences in browser implementations, aria-label=" " is used to make the screen reader not repeat the same text multiple times in Chrome v75 */
+                // Because of differences in browser implementations, aria-label=" " is used to make the screen reader not repeat the same text multiple times in Chrome v75
                 aria-label=" "
                 className={classNames(styleSet.activity + '', {
                   // Hide timestamp if same timestamp group with the next activity
@@ -110,9 +119,7 @@ const BasicTranscript = ({
               >
                 {element}
                 {// TODO: [P2] We should use core/definitions/speakingActivity for this predicate instead
-                speechSynthesis && activity.channelData && activity.channelData.speak && (
-                  <SpeakActivity activity={activity} />
-                )}
+                activity.channelData && activity.channelData.speak && <SpeakActivity activity={activity} />}
               </li>
             ))}
           </ul>

@@ -0,0 +1,164 @@
+// Since this is a bypass, we will relax some ESLint rules.
+// All classes/properties defined here are in W3C Web Speech API.
+
+/* eslint class-methods-use-this: "off" */
+/* eslint getter-return: "off" */
+/* eslint max-classes-per-file: ["error", 4] */
+/* eslint no-empty-function: "off" */
+
+import EventTarget, { defineEventAttribute } from '../external/event-target-shim';
+
+class SpeechSynthesisEvent {
+  constructor(type, utterance) {
+    this._type = type;
+    this._utterance = utterance;
+  }
+
+  get charIndex() {
+    return 0;
+  }
+
+  get elapsedTime() {
+    return 0;
+  }
+
+  get name() {}
+
+  get type() {
+    return this._type;
+  }
+
+  get utterance() {
+    return this._utterance;
+  }
+}
+
+class SpeechSynthesisUtterance extends EventTarget {
+  constructor(text) {
+    super();
+
+    this._lang = 'en-US';
+    this._pitch = 1;
+    this._rate = 1;
+    this._text = text;
+    this._voice = null;
+    this._volume = 1;
+  }
+
+  get lang() {
+    return this._lang;
+  }
+
+  set lang(value) {
+    this._lang = value;
+  }
+
+  get pitch() {
+    return this._pitch;
+  }
+
+  set pitch(value) {
+    this._pitch = value;
+  }
+
+  get rate() {
+    return this._rate;
+  }
+
+  set rate(value) {
+    this._rate = value;
+  }
+
+  get text() {
+    return this._text;
+  }
+
+  set text(value) {
+    this._text = value;
+  }
+
+  get voice() {
+    return this._voice;
+  }
+
+  set voice(value) {
+    this._voice = value;
+  }
+
+  get volume() {
+    return this._volume;
+  }
+
+  set volume(value) {
+    this._volume = value;
+  }
+}
+
+defineEventAttribute(SpeechSynthesisUtterance.prototype, 'boundary');
+defineEventAttribute(SpeechSynthesisUtterance.prototype, 'end');
+defineEventAttribute(SpeechSynthesisUtterance.prototype, 'error');
+defineEventAttribute(SpeechSynthesisUtterance.prototype, 'mark');
+defineEventAttribute(SpeechSynthesisUtterance.prototype, 'pause');
+defineEventAttribute(SpeechSynthesisUtterance.prototype, 'resume');
+defineEventAttribute(SpeechSynthesisUtterance.prototype, 'start');
+
+class SpeechSynthesisVoice {
+  get default() {
+    return true;
+  }
+
+  get lang() {
+    return 'en-US';
+  }
+
+  get localService() {
+    return true;
+  }
+
+  get name() {
+    return 'English (US)';
+  }
+
+  get voiceURI() {
+    return 'English (US)';
+  }
+}
+
+class SpeechSynthesis extends EventTarget {
+  get paused() {
+    return false;
+  }
+
+  get pending() {
+    return false;
+  }
+
+  get speaking() {
+    return false;
+  }
+
+  cancel() {}
+
+  getVoices() {
+    return [new SpeechSynthesisVoice()];
+  }
+
+  pause() {
+    throw new Error('pause is not implemented.');
+  }
+
+  resume() {
+    throw new Error('resume is not implemented.');
+  }
+
+  speak(utterance) {
+    utterance.dispatchEvent(new SpeechSynthesisEvent('start', utterance));
+    utterance.dispatchEvent(new SpeechSynthesisEvent('end', utterance));
+  }
+}
+
+defineEventAttribute(SpeechSynthesis.prototype, 'voiceschanged');
+
+const speechSynthesis = new SpeechSynthesis();
+
+export { speechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance, SpeechSynthesisVoice };