Source: lib/text/text_engine.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.text.TextEngine');
  7. goog.require('goog.asserts');
  8. goog.require('shaka.Deprecate');
  9. goog.require('shaka.media.ClosedCaptionParser');
  10. goog.require('shaka.text.Cue');
  11. goog.require('shaka.util.BufferUtils');
  12. goog.require('shaka.util.IDestroyable');
  13. goog.require('shaka.util.MimeUtils');
  14. // TODO: revisit this when Closure Compiler supports partially-exported classes.
  15. /**
  16. * @summary Manages text parsers and cues.
  17. * @implements {shaka.util.IDestroyable}
  18. * @export
  19. */
  20. shaka.text.TextEngine = class {
  21. /** @param {shaka.extern.TextDisplayer} displayer */
  22. constructor(displayer) {
  23. /** @private {?shaka.extern.TextParser} */
  24. this.parser_ = null;
  25. /** @private {shaka.extern.TextDisplayer} */
  26. this.displayer_ = displayer;
  27. /** @private {boolean} */
  28. this.segmentRelativeVttTiming_ = false;
  29. /** @private {number} */
  30. this.timestampOffset_ = 0;
  31. /** @private {number} */
  32. this.appendWindowStart_ = 0;
  33. /** @private {number} */
  34. this.appendWindowEnd_ = Infinity;
  35. /** @private {?number} */
  36. this.bufferStart_ = null;
  37. /** @private {?number} */
  38. this.bufferEnd_ = null;
  39. /** @private {string} */
  40. this.selectedClosedCaptionId_ = '';
  41. /** @private {shaka.extern.TextParser.ModifyCueCallback} */
  42. this.modifyCueCallback_ = (cue, uri) => {};
  43. /**
  44. * The closed captions map stores the CEA closed captions by closed captions
  45. * id and start and end time.
  46. * It's used as the buffer of closed caption text streams, to show captions
  47. * when we start displaying captions or switch caption tracks, we need to be
  48. * able to get the cues for the other language and display them without
  49. * re-fetching the video segments they were embedded in.
  50. * Structure of closed caption map:
  51. * closed caption id -> {start and end time -> cues}
  52. * @private {!Map<string, !Map<string, !Array<shaka.text.Cue>>>}
  53. */
  54. this.closedCaptionsMap_ = new Map();
  55. }
  56. /**
  57. * @param {string} mimeType
  58. * @param {!shaka.extern.TextParserPlugin} plugin
  59. * @export
  60. */
  61. static registerParser(mimeType, plugin) {
  62. shaka.text.TextEngine.parserMap_.set(mimeType, plugin);
  63. }
  64. /**
  65. * @param {string} mimeType
  66. * @export
  67. */
  68. static unregisterParser(mimeType) {
  69. shaka.text.TextEngine.parserMap_.delete(mimeType);
  70. }
  71. /**
  72. * @return {?shaka.extern.TextParserPlugin}
  73. * @export
  74. */
  75. static findParser(mimeType) {
  76. return shaka.text.TextEngine.parserMap_.get(mimeType);
  77. }
  78. /**
  79. * @param {string} mimeType
  80. * @return {boolean}
  81. */
  82. static isTypeSupported(mimeType) {
  83. if (shaka.text.TextEngine.parserMap_.has(mimeType)) {
  84. // An actual parser is available.
  85. return true;
  86. }
  87. if (mimeType == shaka.util.MimeUtils.CEA608_CLOSED_CAPTION_MIMETYPE ||
  88. mimeType == shaka.util.MimeUtils.CEA708_CLOSED_CAPTION_MIMETYPE ) {
  89. return !!shaka.media.ClosedCaptionParser.findDecoder();
  90. }
  91. return false;
  92. }
  93. // TODO: revisit this when the compiler supports partially-exported classes.
  94. /**
  95. * @override
  96. * @export
  97. */
  98. destroy() {
  99. this.parser_ = null;
  100. this.displayer_ = null;
  101. this.closedCaptionsMap_.clear();
  102. return Promise.resolve();
  103. }
  104. /**
  105. * @param {!shaka.extern.TextDisplayer} displayer
  106. */
  107. setDisplayer(displayer) {
  108. this.displayer_ = displayer;
  109. }
  110. /**
  111. * Initialize the parser. This can be called multiple times, but must be
  112. * called at least once before appendBuffer.
  113. *
  114. * @param {string} mimeType
  115. * @param {boolean} sequenceMode
  116. * @param {boolean} segmentRelativeVttTiming
  117. * @param {string} manifestType
  118. */
  119. initParser(mimeType, sequenceMode, segmentRelativeVttTiming, manifestType) {
  120. // No parser for CEA, which is extracted from video and side-loaded
  121. // into TextEngine and TextDisplayer.
  122. if (mimeType == shaka.util.MimeUtils.CEA608_CLOSED_CAPTION_MIMETYPE ||
  123. mimeType == shaka.util.MimeUtils.CEA708_CLOSED_CAPTION_MIMETYPE) {
  124. this.parser_ = null;
  125. return;
  126. }
  127. const factory = shaka.text.TextEngine.parserMap_.get(mimeType);
  128. goog.asserts.assert(
  129. factory, 'Text type negotiation should have happened already');
  130. this.parser_ = factory();
  131. if (this.parser_.setSequenceMode) {
  132. this.parser_.setSequenceMode(sequenceMode);
  133. } else {
  134. shaka.Deprecate.deprecateFeature(5,
  135. 'Text parsers w/ setSequenceMode',
  136. 'Text parsers should have a "setSequenceMode" method!');
  137. }
  138. if (this.parser_.setManifestType) {
  139. this.parser_.setManifestType(manifestType);
  140. } else {
  141. shaka.Deprecate.deprecateFeature(5,
  142. 'Text parsers w/ setManifestType',
  143. 'Text parsers should have a "setManifestType" method!');
  144. }
  145. this.segmentRelativeVttTiming_ = segmentRelativeVttTiming;
  146. }
  147. /** @param {shaka.extern.TextParser.ModifyCueCallback} modifyCueCallback */
  148. setModifyCueCallback(modifyCueCallback) {
  149. this.modifyCueCallback_ = modifyCueCallback;
  150. }
  151. /**
  152. * @param {BufferSource} buffer
  153. * @param {?number} startTime relative to the start of the presentation
  154. * @param {?number} endTime relative to the start of the presentation
  155. * @param {?string=} uri
  156. * @return {!Promise}
  157. */
  158. async appendBuffer(buffer, startTime, endTime, uri) {
  159. goog.asserts.assert(
  160. this.parser_, 'The parser should already be initialized');
  161. // Start the operation asynchronously to avoid blocking the caller.
  162. await Promise.resolve();
  163. // Check that TextEngine hasn't been destroyed.
  164. if (!this.parser_ || !this.displayer_) {
  165. return;
  166. }
  167. if (startTime == null || endTime == null) {
  168. this.parser_.parseInit(shaka.util.BufferUtils.toUint8(buffer));
  169. return;
  170. }
  171. const vttOffset = this.segmentRelativeVttTiming_ ?
  172. startTime : this.timestampOffset_;
  173. /** @type {shaka.extern.TextParser.TimeContext} **/
  174. const time = {
  175. periodStart: this.timestampOffset_,
  176. segmentStart: startTime,
  177. segmentEnd: endTime,
  178. vttOffset: vttOffset,
  179. };
  180. // Parse the buffer and add the new cues.
  181. const allCues = this.parser_.parseMedia(
  182. shaka.util.BufferUtils.toUint8(buffer), time, uri, /* images= */ []);
  183. for (const cue of allCues) {
  184. this.modifyCueCallback_(cue, uri || null, time);
  185. }
  186. const cuesToAppend = allCues.filter((cue) => {
  187. return cue.startTime >= this.appendWindowStart_ &&
  188. cue.startTime < this.appendWindowEnd_;
  189. });
  190. this.displayer_.append(cuesToAppend);
  191. // NOTE: We update the buffered range from the start and end times
  192. // passed down from the segment reference, not with the start and end
  193. // times of the parsed cues. This is important because some segments
  194. // may contain no cues, but we must still consider those ranges
  195. // buffered.
  196. if (this.bufferStart_ == null) {
  197. this.bufferStart_ = Math.max(startTime, this.appendWindowStart_);
  198. } else {
  199. // We already had something in buffer, and we assume we are extending
  200. // the range from the end.
  201. goog.asserts.assert(
  202. this.bufferEnd_ != null,
  203. 'There should already be a buffered range end.');
  204. goog.asserts.assert(
  205. (startTime - this.bufferEnd_) <= 1,
  206. 'There should not be a gap in text references >1s');
  207. }
  208. this.bufferEnd_ = Math.min(endTime, this.appendWindowEnd_);
  209. }
  210. /**
  211. * @param {number} startTime relative to the start of the presentation
  212. * @param {number} endTime relative to the start of the presentation
  213. * @return {!Promise}
  214. */
  215. async remove(startTime, endTime) {
  216. // Start the operation asynchronously to avoid blocking the caller.
  217. await Promise.resolve();
  218. if (this.displayer_ && this.displayer_.remove(startTime, endTime)) {
  219. if (this.bufferStart_ == null) {
  220. goog.asserts.assert(
  221. this.bufferEnd_ == null, 'end must be null if startTime is null');
  222. } else {
  223. goog.asserts.assert(
  224. this.bufferEnd_ != null,
  225. 'end must be non-null if startTime is non-null');
  226. // Update buffered range.
  227. if (endTime <= this.bufferStart_ || startTime >= this.bufferEnd_) {
  228. // No intersection. Nothing was removed.
  229. } else if (startTime <= this.bufferStart_ &&
  230. endTime >= this.bufferEnd_) {
  231. // We wiped out everything.
  232. this.bufferStart_ = this.bufferEnd_ = null;
  233. } else if (startTime <= this.bufferStart_ &&
  234. endTime < this.bufferEnd_) {
  235. // We removed from the beginning of the range.
  236. this.bufferStart_ = endTime;
  237. } else if (startTime > this.bufferStart_ &&
  238. endTime >= this.bufferEnd_) {
  239. // We removed from the end of the range.
  240. this.bufferEnd_ = startTime;
  241. } else {
  242. // We removed from the middle? StreamingEngine isn't supposed to.
  243. goog.asserts.assert(
  244. false, 'removal from the middle is not supported by TextEngine');
  245. }
  246. }
  247. }
  248. }
  249. /** @param {number} timestampOffset */
  250. setTimestampOffset(timestampOffset) {
  251. this.timestampOffset_ = timestampOffset;
  252. }
  253. /**
  254. * @param {number} appendWindowStart
  255. * @param {number} appendWindowEnd
  256. */
  257. setAppendWindow(appendWindowStart, appendWindowEnd) {
  258. this.appendWindowStart_ = appendWindowStart;
  259. this.appendWindowEnd_ = appendWindowEnd;
  260. }
  261. /**
  262. * @return {?number} Time in seconds of the beginning of the buffered range,
  263. * or null if nothing is buffered.
  264. */
  265. bufferStart() {
  266. return this.bufferStart_;
  267. }
  268. /**
  269. * @return {?number} Time in seconds of the end of the buffered range,
  270. * or null if nothing is buffered.
  271. */
  272. bufferEnd() {
  273. return this.bufferEnd_;
  274. }
  275. /**
  276. * @param {number} t A timestamp
  277. * @return {boolean}
  278. */
  279. isBuffered(t) {
  280. if (this.bufferStart_ == null || this.bufferEnd_ == null) {
  281. return false;
  282. }
  283. return t >= this.bufferStart_ && t < this.bufferEnd_;
  284. }
  285. /**
  286. * @param {number} t A timestamp
  287. * @return {number} Number of seconds ahead of 't' we have buffered
  288. */
  289. bufferedAheadOf(t) {
  290. if (this.bufferEnd_ == null || this.bufferEnd_ < t) {
  291. return 0;
  292. }
  293. goog.asserts.assert(
  294. this.bufferStart_ != null,
  295. 'start should not be null if end is not null');
  296. return this.bufferEnd_ - Math.max(t, this.bufferStart_);
  297. }
  298. /**
  299. * Set the selected closed captions id.
  300. * Append the cues stored in the closed captions map until buffer end time.
  301. * This is to fill the gap between buffered and unbuffered captions, and to
  302. * avoid duplicates that would be caused by any future video segments parsed
  303. * for captions.
  304. *
  305. * @param {string} id
  306. * @param {number} bufferEndTime Load any stored cues up to this time.
  307. */
  308. setSelectedClosedCaptionId(id, bufferEndTime) {
  309. this.selectedClosedCaptionId_ = id;
  310. const captionsMap = this.closedCaptionsMap_.get(id);
  311. if (captionsMap) {
  312. for (const startAndEndTime of captionsMap.keys()) {
  313. /** @type {Array<!shaka.text.Cue>} */
  314. const cues = captionsMap.get(startAndEndTime)
  315. .filter((c) => c.endTime <= bufferEndTime);
  316. if (cues) {
  317. this.displayer_.append(cues);
  318. }
  319. }
  320. }
  321. }
  322. /**
  323. * @param {!shaka.text.Cue} cue the cue to apply the timestamp to recursively
  324. * @param {number} videoTimestampOffset the timestamp offset of the video
  325. * @private
  326. */
  327. applyVideoTimestampOffsetRecursive_(cue, videoTimestampOffset) {
  328. cue.startTime += videoTimestampOffset;
  329. cue.endTime += videoTimestampOffset;
  330. for (const nested of cue.nestedCues) {
  331. this.applyVideoTimestampOffsetRecursive_(nested, videoTimestampOffset);
  332. }
  333. }
  334. /**
  335. * Store the closed captions in the text engine, and append the cues to the
  336. * text displayer. This is a side-channel used for embedded text only.
  337. *
  338. * @param {!Array<!shaka.extern.ICaptionDecoder.ClosedCaption>} closedCaptions
  339. * @param {?number} startTime relative to the start of the presentation
  340. * @param {?number} endTime relative to the start of the presentation
  341. * @param {number} videoTimestampOffset the timestamp offset of the video
  342. * stream in which these captions were embedded
  343. */
  344. storeAndAppendClosedCaptions(
  345. closedCaptions, startTime, endTime, videoTimestampOffset) {
  346. const startAndEndTime = startTime + ' ' + endTime;
  347. /** @type {!Map<string, !Map<string, !Array<!shaka.text.Cue>>>} */
  348. const captionsMap = new Map();
  349. for (const caption of closedCaptions) {
  350. const id = caption.stream;
  351. const cue = caption.cue;
  352. if (!captionsMap.has(id)) {
  353. captionsMap.set(id, new Map());
  354. }
  355. if (!captionsMap.get(id).has(startAndEndTime)) {
  356. captionsMap.get(id).set(startAndEndTime, []);
  357. }
  358. // Adjust CEA captions with respect to the timestamp offset of the video
  359. // stream in which they were embedded.
  360. this.applyVideoTimestampOffsetRecursive_(cue, videoTimestampOffset);
  361. const keepThisCue =
  362. cue.startTime >= this.appendWindowStart_ &&
  363. cue.startTime < this.appendWindowEnd_;
  364. if (!keepThisCue) {
  365. continue;
  366. }
  367. captionsMap.get(id).get(startAndEndTime).push(cue);
  368. if (id == this.selectedClosedCaptionId_) {
  369. this.displayer_.append([cue]);
  370. }
  371. }
  372. for (const id of captionsMap.keys()) {
  373. if (!this.closedCaptionsMap_.has(id)) {
  374. this.closedCaptionsMap_.set(id, new Map());
  375. }
  376. for (const startAndEndTime of captionsMap.get(id).keys()) {
  377. const cues = captionsMap.get(id).get(startAndEndTime);
  378. this.closedCaptionsMap_.get(id).set(startAndEndTime, cues);
  379. }
  380. }
  381. if (this.bufferStart_ == null) {
  382. this.bufferStart_ = Math.max(startTime, this.appendWindowStart_);
  383. } else {
  384. this.bufferStart_ = Math.min(
  385. this.bufferStart_, Math.max(startTime, this.appendWindowStart_));
  386. }
  387. this.bufferEnd_ = Math.max(
  388. this.bufferEnd_, Math.min(endTime, this.appendWindowEnd_));
  389. }
  390. /**
  391. * Get the number of closed caption channels.
  392. *
  393. * This function is for TESTING ONLY. DO NOT USE in the library.
  394. *
  395. * @return {number}
  396. */
  397. getNumberOfClosedCaptionChannels() {
  398. return this.closedCaptionsMap_.size;
  399. }
  400. /**
  401. * Get the number of closed caption cues for a given channel. If there is
  402. * no channel for the given channel id, this will return 0.
  403. *
  404. * This function is for TESTING ONLY. DO NOT USE in the library.
  405. *
  406. * @param {string} channelId
  407. * @return {number}
  408. */
  409. getNumberOfClosedCaptionsInChannel(channelId) {
  410. const channel = this.closedCaptionsMap_.get(channelId);
  411. return channel ? channel.size : 0;
  412. }
  413. };
  414. /** @private {!Map<string, !shaka.extern.TextParserPlugin>} */
  415. shaka.text.TextEngine.parserMap_ = new Map();