diff --git a/public/lunchroom_manners/lunchroom_manners.vtt b/public/lunchroom_manners/lunchroom_manners.vtt
index efadbb85..9ea6110e 100644
--- a/public/lunchroom_manners/lunchroom_manners.vtt
+++ b/public/lunchroom_manners/lunchroom_manners.vtt
@@ -1,16 +1,37 @@
WEBVTT
+region
+id:bill
+width:40%
+lines:3
+regionanchor:100%,100%
+viewportanchor:90%,90%
+scroll:up
+
+STYLE
+::cue {
+ background-image: linear-gradient(to bottom, dimgray, lightgray);
+ color: papayawhip;
+}
+/* Style blocks cannot use blank lines nor "dash dash greater than" */
+
+NOTE
+This file was machine-generated.
+The cues and timing maybe not 100% accurate.
+
1
-00:00:01.200 --> 00:00:21.000
+00:00:01.200 --> 00:00:21.000 region:fred align:left
[music]
+NOTE End of music, and starting dialog
+
2
-00:00:22.200 --> 00:00:26.600
-Just before lunch one day, a puppet show
+00:00:22.200 --> 00:00:26.600 region:bill align:right
+Just before lunch one day, a puppet show
was put on at school.
3
-00:00:26.700 --> 00:00:31.500
+00:00:26.700 --> 00:00:31.500 region:fred align:left
It was called "Mister Bungle Goes to Lunch".
4
diff --git a/src/components/Transcript/Transcript.js b/src/components/Transcript/Transcript.js
index 1ca4ef5e..dd1c0721 100644
--- a/src/components/Transcript/Transcript.js
+++ b/src/components/Transcript/Transcript.js
@@ -8,11 +8,13 @@ import {
parseTranscriptData,
sanitizeTranscripts,
TRANSCRIPT_TYPES,
+ TRANSCRIPT_CUE_TYPES,
} from '@Services/transcript-parser';
import './Transcript.scss';
const NO_TRANSCRIPTS_MSG = 'No valid Transcript(s) found, please check again.';
const INVALID_URL_MSG = 'Invalid URL for transcript, please check again.';
+const INVALID_VTT = 'Invalid WebVTT file, please check again.';
const NO_SUPPORT = 'Transcript format is not supported, please check again.';
/**
@@ -231,6 +233,8 @@ const Transcript = ({ playerID, manifestUrl, transcripts = [] }) => {
newError = NO_TRANSCRIPTS_MSG;
} else if (tType === TRANSCRIPT_TYPES.noSupport) {
newError = NO_SUPPORT;
+ } else if (tType === TRANSCRIPT_TYPES.invalidTimedText) {
+ newError = INVALID_VTT;
}
setTranscript(tData);
setTranscriptInfo({ title, filename, id, isMachineGen, tType, tUrl, tFileExt, tError: newError });
@@ -374,37 +378,45 @@ const Transcript = ({ playerID, manifestUrl, transcripts = [] }) => {
case TRANSCRIPT_TYPES.timedText:
if (transcript.length > 0) {
transcript.map((t, index) => {
- let line = (
- (textRefs.current[index] = el)}
- onClick={handleTranscriptChange}
- onKeyDown={handleOnKeyPress}
- starttime={t.begin} // set custom attribute: starttime
- endtime={t.end} // set custom attribute: endtime
- href={'#'}
- role="listitem"
- >
- {t.begin && (
+ let line;
+ if (t.tag === TRANSCRIPT_CUE_TYPES.note) {
+ line = ;
+ } else if (t.tag === TRANSCRIPT_CUE_TYPES.timedCue) {
+ line = (
+ (textRefs.current[index] = el)}
+ onClick={handleTranscriptChange}
+ onKeyDown={handleOnKeyPress}
+ starttime={t.begin} // set custom attribute: starttime
+ endtime={t.end} // set custom attribute: endtime
+ href={'#'}
+ role="listitem"
+ >
+ {t.begin && (
+
+ [{timeToHHmmss(t.begin, true)}]
+
+ )}
- [{timeToHHmmss(t.begin, true)}]
-
- )}
-
-
-
- );
+ className="ramp--transcript_text"
+ data-testid="transcript_text"
+ key={`ttext_${index}`}
+ dangerouslySetInnerHTML={{ __html: buildSpeakerText(t) }}
+ />
+ );
+ }
timedText.push(line);
});
}
diff --git a/src/components/Transcript/Transcript.scss b/src/components/Transcript/Transcript.scss
index 0b254c8f..90868d33 100644
--- a/src/components/Transcript/Transcript.scss
+++ b/src/components/Transcript/Transcript.scss
@@ -105,6 +105,7 @@ a.ramp--transcript_item {
.ramp--transcript_machine_generated {
flex-basis: 100%;
margin: 0;
+ line-height: 1.5em;
}
.ramp--transcript_auto_scroll_check {
@@ -256,4 +257,4 @@ a.ramp--transcript_item {
100% {
opacity: 0;
}
-}
\ No newline at end of file
+}
diff --git a/src/components/Transcript/Transcript.test.js b/src/components/Transcript/Transcript.test.js
index 9940399f..c0bff6e1 100644
--- a/src/components/Transcript/Transcript.test.js
+++ b/src/components/Transcript/Transcript.test.js
@@ -39,16 +39,19 @@ describe('Transcript component', () => {
begin: 1.2,
end: 21,
text: '[music]',
+ tag: 'TIMED_CUE'
},
{
begin: 22.2,
end: 26.6,
text: 'transcript text 1',
+ tag: 'TIMED_CUE'
},
{
begin: 27.3,
end: 31,
text: 'transcript text 2',
+ tag: 'TIMED_CUE'
},
],
tUrl: 'http://example.com/transcript.json',
@@ -102,7 +105,82 @@ describe('Transcript component', () => {
});
});
- describe('non timed-text', () => {
+ describe('with WebVTT including a header block', () => {
+ let parseTranscriptMock;
+ beforeEach(async () => {
+ const parsedData = {
+ tData: [
+ {
+ begin: 0,
+ end: 0,
+ text: 'NOTE
This is a multi-line comment.
Following is a list of cues.',
+ tag: 'NOTE'
+ },
+ {
+ begin: 1.2,
+ end: 21,
+ text: '[music]',
+ tag: 'TIMED_CUE'
+ },
+ {
+ begin: 22.2,
+ end: 26.6,
+ text: 'transcript text 1',
+ tag: 'TIMED_CUE'
+ },
+ {
+ begin: 27.3,
+ end: 31,
+ text: 'transcript text 2',
+ tag: 'TIMED_CUE'
+ },
+ ],
+ tUrl: 'http://example.com/transcript.vtt',
+ tType: transcriptParser.TRANSCRIPT_TYPES.timedText,
+ tFileExt: 'vtt',
+ };
+ parseTranscriptMock = jest
+ .spyOn(transcriptParser, 'parseTranscriptData')
+ .mockReturnValue(parsedData);
+
+ render(
+
+
+
+
+ );
+ await act(() => Promise.resolve());
+ });
+ test('renders successfully', async () => {
+ await waitFor(() => {
+ expect(parseTranscriptMock).toHaveBeenCalledTimes(1);
+ expect(screen.queryByTestId('transcript_content_1')).toBeInTheDocument();
+ expect(screen.queryAllByTestId('transcript_time')).toHaveLength(3);
+ // One more than timestamps for displaying the comment
+ expect(screen.queryAllByTestId('transcript_text')).toHaveLength(4);
+ });
+ });
+
+ test('renders comment in the header block', async () => {
+ await waitFor(() => {
+ expect(screen.queryAllByTestId('transcript_text')[0]).toHaveTextContent(
+ 'NOTEThis is a multi-line comment.Following is a list of cues.'
+ );
+ });
+ });
+
+ test('renders the rest of the cue with timestamp', async () => {
+ await waitFor(() => {
+ const transcriptItem = screen.queryAllByTestId('transcript_item')[1];
+ expect(transcriptItem).toHaveAttribute('starttime');
+ expect(transcriptItem).toHaveAttribute('endtime');
+ fireEvent.click(transcriptItem);
+ expect(transcriptItem.classList.contains('active')).toBeTruthy();
+ });
+ });
+ });
+
+ describe('with transcript as an annotation list', () => {
let parseTranscriptMock;
beforeEach(async () => {
const parsedData = {
@@ -111,16 +189,19 @@ describe('Transcript component', () => {
begin: null,
end: null,
text: '[music]',
+ tag: 'TIMED_CUE'
},
{
begin: null,
end: null,
text: 'transcript text 1',
+ tag: 'TIMED_CUE'
},
{
begin: null,
end: null,
text: 'transcript text 2',
+ tag: 'TIMED_CUE'
},
],
tUrl: 'http://example.com/transcript.json',
@@ -266,8 +347,8 @@ describe('Transcript component', () => {
});
});
- describe('renders a message with invalid transcript data', () => {
- test('empty list of transcripts', () => {
+ describe('renders a message for', () => {
+ test('an empty list of transcripts', () => {
render(
@@ -281,7 +362,7 @@ describe('Transcript component', () => {
);
});
- test('empty transcript item list', async () => {
+ test('an empty transcript item list', async () => {
const props = {
playerID: 'player-id',
transcripts: [
@@ -514,6 +595,49 @@ describe('Transcript component', () => {
);
});
});
+
+ test('invalid WebVTT file', async () => {
+ const props = {
+ playerID: 'player-id',
+ transcripts: [
+ {
+ canvasId: 0,
+ items: [
+ {
+ title: 'WebVTT Transcript',
+ url: 'https://example.com/lunchroom_manners.vtt',
+ },
+ ],
+ },
+ ],
+ };
+
+ const parseTranscriptMock = jest
+ .spyOn(transcriptParser, 'parseTranscriptData')
+ .mockReturnValue({
+ tData: [],
+ tUrl: 'https://example.com/lunchroom_manners.vtt',
+ tType: transcriptParser.TRANSCRIPT_TYPES.invalidTimedText,
+ });
+
+
+ render(
+
+
+
+
+ );
+ await act(() => Promise.resolve());
+
+ await waitFor(() => {
+ expect(parseTranscriptMock).toHaveBeenCalledTimes(1);
+ expect(screen.queryByTestId('transcript_content_-3')).toBeInTheDocument();
+ expect(screen.queryByTestId('no-transcript')).toBeInTheDocument();
+ expect(screen.getByTestId('no-transcript')).toHaveTextContent(
+ 'Invalid WebVTT file, please check again.'
+ );
+ });
+ });
});
describe('with props', () => {
@@ -558,7 +682,10 @@ describe('Transcript component', () => {
const parseTranscriptMock = jest
.spyOn(transcriptParser, 'parseTranscriptData')
.mockReturnValue({
- tData: [{ begin: 1.2, end: 21, text: '[music]' }, { begin: 22.2, end: 26.6, text: 'transcript text 1' }],
+ tData: [
+ { begin: 1.2, end: 21, text: '[music]', tag: 'TIMED_CUE' },
+ { begin: 22.2, end: 26.6, text: 'transcript text 1', tag: 'TIMED_CUE' }
+ ],
tUrl: 'http://example.com/webvtt-transcript.vtt',
tType: transcriptParser.TRANSCRIPT_TYPES.timedText,
tFileExt: 'vtt',
diff --git a/src/services/transcript-parser.js b/src/services/transcript-parser.js
index 61f7cdb2..adb15f2f 100644
--- a/src/services/transcript-parser.js
+++ b/src/services/transcript-parser.js
@@ -30,7 +30,21 @@ const TRANSCRIPT_MIME_EXTENSIONS = [
];
// ENum for describing transcript types include invalid and no transcript info
-export const TRANSCRIPT_TYPES = { noSupport: -2, invalid: -1, noTranscript: 0, timedText: 1, plainText: 2, docx: 3 };
+export const TRANSCRIPT_TYPES = {
+ invalidTimedText: -3,
+ noSupport: -2,
+ invalid: -1,
+ noTranscript: 0,
+ timedText: 1,
+ plainText: 2,
+ docx: 3
+};
+
+// ENum for types transcript text lines in a time-synced transcript
+export const TRANSCRIPT_CUE_TYPES = {
+ note: 'NOTE',
+ timedCue: 'TIMED_CUE',
+};
/**
* Parse the transcript information in the Manifest presented as supplementing annotations
@@ -304,8 +318,8 @@ export async function parseTranscriptData(url, canvasIndex, format) {
if (textLines.length == 0) {
return { tData: [], tUrl: url, tType: TRANSCRIPT_TYPES.noTranscript };
} else {
- tData = parseTimedText(textData, fileType === 'srt');
- return { tData: tData, tUrl: url, tType: TRANSCRIPT_TYPES.timedText, tFileExt: fileType };
+ let { tData, tType } = parseTimedText(textData, fileType === 'srt');
+ return { tData: tData, tUrl: url, tType: tType, tFileExt: fileType };
}
// for .docx files
case 'docx':
@@ -441,8 +455,9 @@ async function parseExternalAnnotations(annotation) {
.then((response) => response.text())
.then((data) => {
if (TRANSCRIPT_MIME_TYPES.webvtt.includes(tFormat) || TRANSCRIPT_MIME_TYPES.srt.includes(tFormat)) {
- tData = parseTimedText(data, TRANSCRIPT_MIME_TYPES.srt.includes(tFormat));
- type = TRANSCRIPT_TYPES.timedText;
+ let parsed = parseTimedText(data, TRANSCRIPT_MIME_TYPES.srt.includes(tFormat));
+ tData = parsed.tData;
+ type = parsed.tType;
tFileExt = TRANSCRIPT_MIME_EXTENSIONS.filter(tm => tm.type.includes(tFormat))[0].ext;
} else {
tData = data.replace(/\n/g, "
");
@@ -503,6 +518,7 @@ function createTData(annotations) {
format: tBody.getFormat(),
begin: parseFloat(start),
end: parseFloat(end),
+ tag: TRANSCRIPT_CUE_TYPES.timedCue
});
}
});
@@ -519,41 +535,122 @@ function createTData(annotations) {
* begin: '00:00:00.000',
* end: '00:01:00.000',
* text: 'Transcript text sample'
+ * tag: NOTE || TIMED_CUE
* }
*/
export function parseTimedText(fileData, isSRT = false) {
let tData = [];
+ let noteLines = [];
- const lines = cleanTimedText(fileData);
+ // split file content into lines
+ const lines = fileData.split('\n');
+
+ // For SRT files all of the file content is considered as cues
+ let cueLines = lines;
if (!isSRT) {
- const firstLine = lines.shift();
- const valid = validateWebVTT(firstLine);
+ const { valid, cue_lines, notes } = validateWebVTT(lines);
if (!valid) {
console.error('Invalid WebVTT file');
- return [];
+ return { tData: [], tType: TRANSCRIPT_TYPES.invalidTimedText };
}
+ cueLines = cue_lines;
+ noteLines = notes;
}
- const groups = groupTimedTextLines(lines);
+
+ const timedText = cleanTimedText(cueLines);
+ const groups = groupTimedTextLines(timedText);
+ // Add back the NOTE(s) in the header block
+ groups.unshift(...noteLines);
groups.map((t) => {
let line = parseTimedTextLine(t, isSRT);
if (line) {
tData.push(line);
}
});
- return tData;
+
+ return { tData, tType: TRANSCRIPT_TYPES.timedText };
}
/**
- * Validate WebVTT file with its header
- * @param {String} line header line of the WebVTT file
+ * Validate WebVTT file with its header content
+ * @param {Array} lines WebVTT file content split into lines
* @returns {Boolean}
*/
-function validateWebVTT(line) {
- if (line.includes('WEBVTT')) {
- return true;
+function validateWebVTT(lines) {
+ const firstLine = lines.shift().trim();
+ if (firstLine?.length == 6 && firstLine === 'WEBVTT') {
+ const { valid, cue_lines, notes } = validateWebVTTHeaders(lines);
+ return { valid, cue_lines, notes };
+ } else {
+ return { valid: false, cue_lines: [], notes: [] };
+ }
+}
+
+/**
+ * Validate the text between 'WEBVTT' at the start and start of
+ * VTT cues. It looks for REGION and STYLE blocks and skips over these
+ * blocks. This doesn't validate the content within these blocks.
+ * When there's text in the header not followed by the keywords REGION and
+ * STYLE the WebVTT file is marked invalid.
+ * @param {Array} lines WebVTT file content split into lines
+ * @returns
+ */
+function validateWebVTTHeaders(lines) {
+ let endOfHeadersIndex = 0;
+ let firstCueIndex = 0;
+ let hasTextBeforeCues = false;
+ let notesInHeader = [];
+
+ // Remove line numbers for vtt cues
+ lines = lines.filter((l) => (Number(l) ? false : true));
+
+ for (let i = 0; i < lines.length; i++) {
+ const line = lines[i];
+ // Skip REGION and STYLE blocks as these are related to displaying cues as overlays
+ if ((/^REGION$/).test(line.toUpperCase())
+ || (/^STYLE$/).test(line.toUpperCase())) {
+ // Increment until an empty line is encountered within the header block
+ i++;
+ while (i < lines.length
+ && (!lines[i] == '\r' || !lines[i] == '\n' || !lines[i] == '\r\n')) {
+ i++;
+ }
+ endOfHeadersIndex = i;
+ }
+ // Gather comments presented as NOTE(s) in the header block to be displayed as transcript
+ else if ((/^NOTE$/).test(line.toUpperCase())) {
+ let noteText = line;
+ i++;
+ // Increment until an empty line is encountered within the NOTE block
+ while (i < lines.length
+ && (!lines[i] == '\r' || !lines[i] == '\n' || !lines[i] == '\r\n')) {
+ noteText = `${noteText}
${lines[i].trim()}`;
+ i++;
+ }
+ notesInHeader.push({ times: '', line: noteText, tag: TRANSCRIPT_CUE_TYPES.note });
+ }
+ // Terminate validation once the first cue is reached
+ else if (line.includes('-->')) {
+ // Break the loop when it reaches the first vtt cue
+ firstCueIndex = i;
+ break;
+ }
+ // Flag to check for invalid text before cue lines
+ else if (typeof line === 'string' && line.trim().length != 0) {
+ hasTextBeforeCues = true;
+ }
+ }
+
+ // Return the cues and comments in the header block when the given WebVTT is valid
+ if (firstCueIndex > endOfHeadersIndex && !hasTextBeforeCues) {
+ return {
+ valid: true,
+ cue_lines: lines.slice(firstCueIndex),
+ notes: notesInHeader
+ };
} else {
- return false;
+ return { valid: false };
}
}
@@ -563,27 +660,28 @@ function validateWebVTT(line) {
* @param {String} data WebVTT data as a blob of text
* @returns {Array}
*/
-function cleanTimedText(data) {
- // split into lines
- let lines = data.split('\n');
- // remove empty lines
- let text_lines = lines.filter((l) => l.length > 0);
- // remove line numbers
- text_lines = text_lines.filter((l) => (Number(l) ? false : true));
- // strip white spaces and lines with index
- let stripped = text_lines.filter((l) => !/^[0-9]*[\r]/gm.test(l));
+function cleanTimedText(lines) {
+ // Remove empty lines
+ let cue_lines = lines.filter((l) => l.length > 0);
+
+ // Strip white spaces and lines with index
+ let stripped = cue_lines.filter((l) => !/^[0-9]*[\r]/gm.test(l));
return stripped;
}
/**
* Group multi line transcript text values alongside the relevant
* timestamp values. E.g. converts,
- * ["00:00:00.000 --> 00:01:00.000", "Transcript text", " from multiple lines",
- * "00:03:00.000 --> 00:04:00.000", "Next transcript text"]
+ * [
+ * "00:00:00.000 --> 00:01:00.000", "Transcript", " from multiple lines",
+ * "00:03:00.000 --> 00:04:00.000", "Next transcript text",
+ * "NOTE This is a comment"
+ * ]
* into
* [
- * { times: "00:00:00.000 --> 00:01:00.000", line: "Transcript text from multiple lines" },
- * { times: "00:03:00.000 --> 00:04:00.000", line: "Next transcript text" },
+ * { times: "00:00:00.000 --> 00:01:00.000", line: "Transcript from multiple lines", tag: "TIMED_CUE" },
+ * { times: "00:03:00.000 --> 00:04:00.000", line: "Next transcript text", tag: "TIMED_CUE" },
+ * { times: "", line: "NOTE This is a comment", tag: "NOTE" }
* ]
* @param {Array} lines array of lines in the WebVTT file
* @returns {Array