はじまりの大地

このコミットが含まれているのは:
2024-07-15 09:14:04 +09:00
コミット 6632905f32
3501個のファイルの変更1439465行の追加0行の削除
+28
ファイルの表示
@@ -0,0 +1,28 @@
import { TranscriptionEngine } from '../transcription-engine.js'
export const engines: TranscriptionEngine[] = [
{
name: 'openai-whisper',
description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
language: 'python',
type: 'binary',
command: 'whisper',
forgeURL: 'https://github.com/openai/whisper',
license: 'MIT',
supportedModelFormats: [ 'PyTorch' ],
languageDetection: true,
version: '20231117'
},
{
name: 'whisper-ctranslate2',
description: 'Whisper command line client compatible with original OpenAI client based on CTranslate2.',
language: 'python',
type: 'binary',
command: 'whisper-ctranslate2',
forgeURL: 'https://github.com/Softcatala/whisper-ctranslate2',
license: 'MIT',
supportedModelFormats: [ 'CTranslate2' ],
languageDetection: true,
version: '0.4.4'
}
]
+3
ファイルの表示
@@ -0,0 +1,3 @@
export * from './transcriber/index.js'
export * from './engines.js'
export * from './whisper-builtin-model.js'
+68
ファイルの表示
@@ -0,0 +1,68 @@
import { buildSUUID } from '@peertube/peertube-node-utils'
import assert from 'node:assert'
import { lstat } from 'node:fs/promises'
import { TranscribeArgs } from '../../abstract-transcriber.js'
import { TranscriptFile } from '../../transcript-file.js'
import { TranscriptionModel } from '../../transcription-model.js'
import { WhisperBuiltinModel } from '../whisper-builtin-model.js'
import { OpenaiTranscriber } from './openai-transcriber.js'
export class Ctranslate2Transcriber extends OpenaiTranscriber {
async transcribe ({
mediaFilePath,
model = new WhisperBuiltinModel('tiny'),
language,
format,
transcriptDirectory,
runId = buildSUUID()
}: TranscribeArgs): Promise<TranscriptFile> {
this.assertLanguageDetectionAvailable(language)
const $$ = this.getExec(this.getExecEnv())
if (model.path) {
assert(await lstat(model.path).then(stats => stats.isDirectory()), 'Model path must be a path to a directory.')
}
const modelArgs = model.path ? [ '--model_directory', model.path ] : [ '--model', model.name ]
const languageArgs = language ? [ '--language', language ] : []
this.createRun(runId)
this.startRun()
await $$`${this.getEngineBinary()} ${[
mediaFilePath,
...modelArgs,
'--word_timestamps',
'True',
'--vad_filter',
'true',
// Better precision with 5s of audio
// We mainly use vad_filter to improve language detection (first 30 seconds of the video, so no voice is problematic)
'--vad_min_silence_duration_ms',
'5000',
'--output_format',
'all',
'--output_dir',
transcriptDirectory,
...languageArgs
]}`
this.stopRun()
return new TranscriptFile({
language: language || await this.getDetectedLanguage(transcriptDirectory, mediaFilePath),
path: this.getTranscriptFilePath(transcriptDirectory, mediaFilePath, format),
format
})
}
supports (model: TranscriptionModel) {
return model.format === 'CTranslate2'
}
async install (directory: string) {
const $$ = this.getExec()
await $$`pip3 install -U -t ${directory} whisper-ctranslate2==${this.engine.version}`
}
}
+2
ファイルの表示
@@ -0,0 +1,2 @@
export * from './ctranslate2-transcriber.js'
export * from './openai-transcriber.js'
+77
ファイルの表示
@@ -0,0 +1,77 @@
import { buildSUUID } from '@peertube/peertube-node-utils'
import { readJSON } from 'fs-extra/esm'
import { parse } from 'node:path'
import { join, resolve } from 'path'
import { AbstractTranscriber, TranscribeArgs } from '../../abstract-transcriber.js'
import { TranscriptFile, TranscriptFormat } from '../../transcript-file.js'
export class OpenaiTranscriber extends AbstractTranscriber {
async transcribe ({
mediaFilePath,
model,
language,
format,
transcriptDirectory,
runId = buildSUUID()
}: TranscribeArgs): Promise<TranscriptFile> {
this.assertLanguageDetectionAvailable(language)
const $$ = this.getExec(this.getExecEnv())
const languageArgs = language ? [ '--language', language ] : []
this.createRun(runId)
this.startRun()
await $$`${this.getEngineBinary()} ${[
mediaFilePath,
'--word_timestamps',
'True',
'--model',
model?.path || model.name,
'--output_format',
'all',
'--output_dir',
transcriptDirectory,
...languageArgs
]}`
this.stopRun()
return new TranscriptFile({
language: language || await this.getDetectedLanguage(transcriptDirectory, mediaFilePath),
path: this.getTranscriptFilePath(transcriptDirectory, mediaFilePath, format),
format
})
}
// ---------------------------------------------------------------------------
protected async getDetectedLanguage (transcriptDirectory: string, mediaFilePath: string) {
const { language } = await this.readJsonTranscriptFile(transcriptDirectory, mediaFilePath)
return language
}
protected async readJsonTranscriptFile (transcriptDirectory: string, mediaFilePath: string) {
return readJSON(this.getTranscriptFilePath(transcriptDirectory, mediaFilePath, 'json'), 'utf8')
}
protected getTranscriptFilePath (transcriptDirectory: string, mediaFilePath: string, format: TranscriptFormat) {
return join(transcriptDirectory, `${parse(mediaFilePath).name}.${format}`)
}
// ---------------------------------------------------------------------------
async install (directory: string) {
const $$ = this.getExec()
await $$`pip3 install -U -t ${[ directory ]} openai-whisper==${this.engine.version}`
}
protected getExecEnv () {
if (!this.binDirectory) return undefined
return { PYTHONPATH: resolve(this.binDirectory, '../') }
}
}
+11
ファイルの表示
@@ -0,0 +1,11 @@
import { TranscriptionModel } from '../transcription-model.js'
export type WhisperBuiltinModelName = 'tiny' | 'base' | 'small' | 'medium' | 'large' | 'large-v2' | 'large-v3'
export class WhisperBuiltinModel extends TranscriptionModel {
// eslint-disable-next-line @typescript-eslint/no-useless-constructor
constructor (name: WhisperBuiltinModelName) {
super(name)
}
}