ニジカ投稿局 https://tv.nizika.tv
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

182 lines
6.5 KiB

  1. import { hasAudioStream } from '@peertube/peertube-ffmpeg'
  2. import { buildSUUID } from '@peertube/peertube-node-utils'
  3. import { AbstractTranscriber, TranscriptionModel, WhisperBuiltinModel, transcriberFactory } from '@peertube/peertube-transcription'
  4. import { moveAndProcessCaptionFile } from '@server/helpers/captions-utils.js'
  5. import { isVideoCaptionLanguageValid } from '@server/helpers/custom-validators/video-captions.js'
  6. import { logger, loggerTagsFactory } from '@server/helpers/logger.js'
  7. import { CONFIG } from '@server/initializers/config.js'
  8. import { DIRECTORIES } from '@server/initializers/constants.js'
  9. import { sequelizeTypescript } from '@server/initializers/database.js'
  10. import { VideoCaptionModel } from '@server/models/video/video-caption.js'
  11. import { VideoJobInfoModel } from '@server/models/video/video-job-info.js'
  12. import { VideoModel } from '@server/models/video/video.js'
  13. import { MVideo, MVideoCaption, MVideoFullLight, MVideoUUID, MVideoUrl } from '@server/types/models/index.js'
  14. import { MutexInterface } from 'async-mutex'
  15. import { ensureDir, remove } from 'fs-extra/esm'
  16. import { join } from 'path'
  17. import { federateVideoIfNeeded } from './activitypub/videos/federate.js'
  18. import { JobQueue } from './job-queue/job-queue.js'
  19. import { Notifier } from './notifier/notifier.js'
  20. import { TranscriptionJobHandler } from './runners/index.js'
  21. import { VideoPathManager } from './video-path-manager.js'
  22. const lTags = loggerTagsFactory('video-caption')
  23. export async function createLocalCaption (options: {
  24. video: MVideo
  25. path: string
  26. language: string
  27. automaticallyGenerated: boolean
  28. }) {
  29. const { language, path, video, automaticallyGenerated } = options
  30. const videoCaption = new VideoCaptionModel({
  31. videoId: video.id,
  32. filename: VideoCaptionModel.generateCaptionName(language),
  33. language,
  34. automaticallyGenerated
  35. }) as MVideoCaption
  36. await moveAndProcessCaptionFile({ path }, videoCaption)
  37. await sequelizeTypescript.transaction(async t => {
  38. await VideoCaptionModel.insertOrReplaceLanguage(videoCaption, t)
  39. })
  40. return Object.assign(videoCaption, { Video: video })
  41. }
  42. export async function createTranscriptionTaskIfNeeded (video: MVideoUUID & MVideoUrl) {
  43. if (CONFIG.VIDEO_TRANSCRIPTION.ENABLED !== true) return
  44. logger.info(`Creating transcription job for ${video.url}`, lTags(video.uuid))
  45. if (CONFIG.VIDEO_TRANSCRIPTION.REMOTE_RUNNERS.ENABLED === true) {
  46. await new TranscriptionJobHandler().create({ video })
  47. } else {
  48. await JobQueue.Instance.createJob({ type: 'video-transcription', payload: { videoUUID: video.uuid } })
  49. }
  50. await VideoJobInfoModel.increaseOrCreate(video.uuid, 'pendingTranscription')
  51. }
  52. // ---------------------------------------------------------------------------
  53. // Transcription task
  54. // ---------------------------------------------------------------------------
  55. let transcriber: AbstractTranscriber
  56. export async function generateSubtitle (options: {
  57. video: MVideoUUID
  58. }) {
  59. const outputPath = join(CONFIG.STORAGE.TMP_DIR, 'transcription', buildSUUID())
  60. let inputFileMutexReleaser: MutexInterface.Releaser
  61. try {
  62. await ensureDir(outputPath)
  63. const binDirectory = join(DIRECTORIES.LOCAL_PIP_DIRECTORY, 'bin')
  64. // Lazy load the transcriber
  65. if (!transcriber) {
  66. transcriber = transcriberFactory.createFromEngineName({
  67. engineName: CONFIG.VIDEO_TRANSCRIPTION.ENGINE,
  68. enginePath: CONFIG.VIDEO_TRANSCRIPTION.ENGINE_PATH,
  69. logger,
  70. binDirectory
  71. })
  72. if (!CONFIG.VIDEO_TRANSCRIPTION.ENGINE_PATH) {
  73. logger.info(`Installing transcriber ${transcriber.engine.name} to generate subtitles`, lTags())
  74. await transcriber.install(DIRECTORIES.LOCAL_PIP_DIRECTORY)
  75. }
  76. }
  77. inputFileMutexReleaser = await VideoPathManager.Instance.lockFiles(options.video.uuid)
  78. const video = await VideoModel.loadFull(options.video.uuid)
  79. const file = video.getMaxQualityFile().withVideoOrPlaylist(video)
  80. await VideoPathManager.Instance.makeAvailableVideoFile(file, async videoInputPath => {
  81. if (await hasAudioStream(videoInputPath) !== true) {
  82. logger.info(
  83. `Do not run transcription for ${video.uuid} in ${outputPath} because it does not contain an audio stream`,
  84. lTags(video.uuid)
  85. )
  86. return
  87. }
  88. // Release input file mutex now we are going to run the command
  89. setTimeout(() => inputFileMutexReleaser(), 1000)
  90. logger.info(`Running transcription for ${video.uuid} in ${outputPath}`, lTags(video.uuid))
  91. const transcriptFile = await transcriber.transcribe({
  92. mediaFilePath: videoInputPath,
  93. model: CONFIG.VIDEO_TRANSCRIPTION.MODEL_PATH
  94. ? await TranscriptionModel.fromPath(CONFIG.VIDEO_TRANSCRIPTION.MODEL_PATH)
  95. : new WhisperBuiltinModel(CONFIG.VIDEO_TRANSCRIPTION.MODEL),
  96. transcriptDirectory: outputPath,
  97. format: 'vtt'
  98. })
  99. await onTranscriptionEnded({ video, language: transcriptFile.language, vttPath: transcriptFile.path })
  100. })
  101. } finally {
  102. if (outputPath) await remove(outputPath)
  103. if (inputFileMutexReleaser) inputFileMutexReleaser()
  104. VideoJobInfoModel.decrease(options.video.uuid, 'pendingTranscription')
  105. .catch(err => logger.error('Cannot decrease pendingTranscription job count', { err, ...lTags(options.video.uuid) }))
  106. }
  107. }
  108. export async function onTranscriptionEnded (options: {
  109. video: MVideoFullLight
  110. language: string
  111. vttPath: string
  112. lTags?: (string | number)[]
  113. }) {
  114. const { video, language, vttPath, lTags: customLTags = [] } = options
  115. if (!isVideoCaptionLanguageValid(language)) {
  116. logger.warn(`Invalid transcription language for video ${video.uuid}`, lTags(video.uuid))
  117. return
  118. }
  119. if (!video.language) {
  120. video.language = language
  121. await video.save()
  122. }
  123. const existing = await VideoCaptionModel.loadByVideoIdAndLanguage(video.id, language)
  124. if (existing && !existing.automaticallyGenerated) {
  125. logger.info(
  126. // eslint-disable-next-line max-len
  127. `Do not replace existing caption for video ${video.uuid} after transcription (subtitle may have been added while during the transcription process)`,
  128. lTags(video.uuid)
  129. )
  130. return
  131. }
  132. const caption = await createLocalCaption({
  133. video,
  134. language,
  135. path: vttPath,
  136. automaticallyGenerated: true
  137. })
  138. await sequelizeTypescript.transaction(async t => {
  139. await federateVideoIfNeeded(video, false, t)
  140. })
  141. Notifier.Instance.notifyOfGeneratedVideoTranscription(caption)
  142. logger.info(`Transcription ended for ${video.uuid}`, lTags(video.uuid, ...customLTags))
  143. }