はじまりの大地

このコミットが含まれているのは:
2024-07-15 09:14:04 +09:00
コミット 6632905f32
3501個のファイルの変更1439465行の追加0行の削除
+102
ファイルの表示
@@ -0,0 +1,102 @@
import { SimpleLogger } from '@peertube/peertube-models'
import { buildSUUID, SUUID } from '@peertube/peertube-node-utils'
import { $ } from 'execa'
import { PerformanceObserver } from 'node:perf_hooks'
import { join } from 'path'
import { TranscriptFile, TranscriptFormat } from './transcript-file.js'
import { TranscriptionEngine } from './transcription-engine.js'
import { TranscriptionModel } from './transcription-model.js'
import { TranscriptionRun } from './transcription-run.js'
export interface TranscribeArgs {
mediaFilePath: string
model: TranscriptionModel
format: TranscriptFormat
transcriptDirectory: string
language?: string
runId?: SUUID
}
export abstract class AbstractTranscriber {
engine: TranscriptionEngine
protected binDirectory: string
protected enginePath: string
protected logger: SimpleLogger
protected performanceObserver?: PerformanceObserver
protected run?: TranscriptionRun
constructor (options: {
engine: TranscriptionEngine
binDirectory?: string
enginePath?: string
logger: SimpleLogger
performanceObserver?: PerformanceObserver
}) {
const { engine, logger, enginePath, binDirectory, performanceObserver } = options
this.engine = engine
this.enginePath = enginePath
this.logger = logger
this.binDirectory = binDirectory
this.performanceObserver = performanceObserver
}
createRun (uuid: SUUID = buildSUUID()) {
this.run = new TranscriptionRun(this.logger, uuid)
}
startRun () {
this.run.start()
}
stopRun () {
this.run.stop()
delete this.run
}
assertLanguageDetectionAvailable (language?: string) {
if (!this.engine.languageDetection && !language) {
throw new Error(`Language detection isn't available in ${this.engine.name}. A language must me provided explicitly.`)
}
}
supports (model: TranscriptionModel) {
return model.format === 'PyTorch'
}
protected getEngineBinary () {
if (this.enginePath) return this.enginePath
if (this.binDirectory) return join(this.binDirectory, this.engine.command)
return this.engine.command
}
protected getExec (env?: { [ id: string ]: string }) {
const logLevels = {
command: 'debug',
output: 'debug',
ipc: 'debug',
error: 'error',
duration: 'debug'
}
return $({
verbose: (_verboseLine, { message, ...verboseObject }) => {
const level = logLevels[verboseObject.type]
this.logger[level](message, verboseObject)
},
env
})
}
abstract transcribe (options: TranscribeArgs): Promise<TranscriptFile>
abstract install (path: string): Promise<void>
}
+12
ファイルの表示
@@ -0,0 +1,12 @@
import { TranscriberFactory } from './transcriber-factory.js'
import { engines } from './whisper/index.js'
export * from './abstract-transcriber.js'
export * from './transcript-file.js'
export * from './subtitle.js'
export * from './transcription-engine.js'
export * from './transcription-model.js'
export * from './transcription-run.js'
export * from './whisper/index.js'
export const transcriberFactory = new TranscriberFactory(engines)
+1
ファイルの表示
@@ -0,0 +1 @@
export const srtToTxt = (srtContent: string) => srtContent.replace(/^\n*\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n/gm, '')
+47
ファイルの表示
@@ -0,0 +1,47 @@
import { SimpleLogger } from '@peertube/peertube-models'
import { TranscriptionEngine, TranscriptionEngineName } from './transcription-engine.js'
import { Ctranslate2Transcriber, OpenaiTranscriber } from './whisper/index.js'
export class TranscriberFactory {
engines: TranscriptionEngine[]
constructor (engines: TranscriptionEngine[]) {
this.engines = engines
}
createFromEngineName (options: {
engineName: TranscriptionEngineName
enginePath?: string
binDirectory?: string
logger: SimpleLogger
}) {
const { engineName } = options
const transcriberArgs = {
...options,
engine: this.getEngineByName(engineName)
}
switch (engineName) {
case 'openai-whisper':
return new OpenaiTranscriber(transcriberArgs)
case 'whisper-ctranslate2':
return new Ctranslate2Transcriber(transcriberArgs)
default:
throw new Error(`Unimplemented engine ${engineName}`)
}
}
getEngineByName (engineName: string) {
const engine = this.engines.find(({ name }) => name === engineName)
if (!engine) {
throw new Error(`Unknow engine ${engineName}`)
}
return engine
}
}
+70
ファイルの表示
@@ -0,0 +1,70 @@
import assert from 'node:assert'
import { readFile, writeFile } from 'node:fs/promises'
import { extname } from 'node:path'
import { srtToTxt } from './subtitle.js'
export type TranscriptFormat = 'txt' | 'vtt' | 'srt' | 'json'
export class TranscriptFile {
path: string
language: string
format: TranscriptFormat = 'vtt'
constructor ({ path, language, format = 'vtt' }: { path: string, language: string, format?: TranscriptFormat }) {
this.path = path
this.language = language
this.format = format
}
/**
* Asynchronously reads the entire contents of a transcript file.
* @see https://nodejs.org/docs/latest-v18.x/api/fs.html#filehandlereadfileoptions for options
*/
async read (options: Parameters<typeof readFile>[1] = 'utf8') {
return readFile(this.path, options)
}
static fromPath (path: string, language = 'en') {
const format = extname(path).substring(1)
const guessableFormats = [ 'txt', 'vtt', 'srt' ]
assert(
guessableFormats.includes(format),
`Couldn't guess transcript format from extension "${format}". Valid formats are: ${guessableFormats.join(', ')}."`)
return new TranscriptFile({ path, language, format: format as TranscriptFormat })
}
/**
* Write a transcript file to disk.
*/
static async write ({
path,
content,
language = 'en',
format = 'vtt'
}: { path: string, content: string, language?: string, format?: TranscriptFormat }): Promise<TranscriptFile> {
await writeFile(path, content)
return new TranscriptFile({ path, language, format })
}
async equals (transcript: TranscriptFile, caseSensitive: boolean = true) {
if (this.language !== transcript.language) {
return false
}
const content = await this.read()
const transcriptContent = await transcript.read()
if (!caseSensitive) {
return String(content).toLowerCase() === String(transcriptContent).toLowerCase()
}
return content === transcriptContent
}
async readAsTxt () {
return srtToTxt(String(await this.read()))
}
}
+16
ファイルの表示
@@ -0,0 +1,16 @@
import { ModelFormat } from './transcription-model.js'
export type TranscriptionEngineName = 'openai-whisper' | 'whisper-ctranslate2'
export interface TranscriptionEngine {
name: TranscriptionEngineName
description?: string
language?: string
type: 'binary'
command: string
version: string
license?: string
forgeURL?: string
supportedModelFormats: ModelFormat[]
languageDetection?: true
}
+34
ファイルの表示
@@ -0,0 +1,34 @@
import assert from 'node:assert'
import { stat } from 'node:fs/promises'
import { parse } from 'node:path'
export type ModelFormat = 'PyTorch' | 'GGML' | 'ONNX' | 'CTranslate2' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
export class TranscriptionModel {
name: string
format?: ModelFormat
path?: string
// # - hparams
// # - Number of dimensions (int)
// # - Name length (int)
// # - Dimensions (int[n_dims])
// # - Name (char[name_length])
// # - Data (float[n_dims])
// # - mel filters
// # - tokenizer vocab
// # - model variables
constructor (name: string, path?: string, format?: ModelFormat) {
this.name = name
this.path = path
this.format = format
}
static async fromPath (path: string) {
assert(await stat(path), `${path} doesn't exist.`)
return new TranscriptionModel(parse(path).name, path)
}
}
+41
ファイルの表示
@@ -0,0 +1,41 @@
import { SimpleLogger } from '@peertube/peertube-models'
import { buildSUUID, SUUID } from '@peertube/peertube-node-utils'
export class TranscriptionRun {
uuid: SUUID
logger: SimpleLogger
constructor (logger: SimpleLogger, uuid: SUUID = buildSUUID()) {
this.uuid = uuid
this.logger = logger
}
get runId () {
return this.uuid
}
start () {
performance.mark(this.getStartPerformanceMarkName())
}
stop () {
try {
performance.mark(this.getEndPerformanceMarkName())
performance.measure(
this.runId,
this.getStartPerformanceMarkName(),
this.getEndPerformanceMarkName()
)
} catch (err) {
this.logger.error(err.message, { err })
}
}
getStartPerformanceMarkName () {
return `${this.runId}-started`
}
getEndPerformanceMarkName () {
return `${this.runId}-ended`
}
}
+28
ファイルの表示
@@ -0,0 +1,28 @@
import { TranscriptionEngine } from '../transcription-engine.js'
export const engines: TranscriptionEngine[] = [
{
name: 'openai-whisper',
description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
language: 'python',
type: 'binary',
command: 'whisper',
forgeURL: 'https://github.com/openai/whisper',
license: 'MIT',
supportedModelFormats: [ 'PyTorch' ],
languageDetection: true,
version: '20231117'
},
{
name: 'whisper-ctranslate2',
description: 'Whisper command line client compatible with original OpenAI client based on CTranslate2.',
language: 'python',
type: 'binary',
command: 'whisper-ctranslate2',
forgeURL: 'https://github.com/Softcatala/whisper-ctranslate2',
license: 'MIT',
supportedModelFormats: [ 'CTranslate2' ],
languageDetection: true,
version: '0.4.4'
}
]
+3
ファイルの表示
@@ -0,0 +1,3 @@
export * from './transcriber/index.js'
export * from './engines.js'
export * from './whisper-builtin-model.js'
+68
ファイルの表示
@@ -0,0 +1,68 @@
import { buildSUUID } from '@peertube/peertube-node-utils'
import assert from 'node:assert'
import { lstat } from 'node:fs/promises'
import { TranscribeArgs } from '../../abstract-transcriber.js'
import { TranscriptFile } from '../../transcript-file.js'
import { TranscriptionModel } from '../../transcription-model.js'
import { WhisperBuiltinModel } from '../whisper-builtin-model.js'
import { OpenaiTranscriber } from './openai-transcriber.js'
export class Ctranslate2Transcriber extends OpenaiTranscriber {
async transcribe ({
mediaFilePath,
model = new WhisperBuiltinModel('tiny'),
language,
format,
transcriptDirectory,
runId = buildSUUID()
}: TranscribeArgs): Promise<TranscriptFile> {
this.assertLanguageDetectionAvailable(language)
const $$ = this.getExec(this.getExecEnv())
if (model.path) {
assert(await lstat(model.path).then(stats => stats.isDirectory()), 'Model path must be a path to a directory.')
}
const modelArgs = model.path ? [ '--model_directory', model.path ] : [ '--model', model.name ]
const languageArgs = language ? [ '--language', language ] : []
this.createRun(runId)
this.startRun()
await $$`${this.getEngineBinary()} ${[
mediaFilePath,
...modelArgs,
'--word_timestamps',
'True',
'--vad_filter',
'true',
// Better precision with 5s of audio
// We mainly use vad_filter to improve language detection (first 30 seconds of the video, so no voice is problematic)
'--vad_min_silence_duration_ms',
'5000',
'--output_format',
'all',
'--output_dir',
transcriptDirectory,
...languageArgs
]}`
this.stopRun()
return new TranscriptFile({
language: language || await this.getDetectedLanguage(transcriptDirectory, mediaFilePath),
path: this.getTranscriptFilePath(transcriptDirectory, mediaFilePath, format),
format
})
}
supports (model: TranscriptionModel) {
return model.format === 'CTranslate2'
}
async install (directory: string) {
const $$ = this.getExec()
await $$`pip3 install -U -t ${directory} whisper-ctranslate2==${this.engine.version}`
}
}
+2
ファイルの表示
@@ -0,0 +1,2 @@
export * from './ctranslate2-transcriber.js'
export * from './openai-transcriber.js'
+77
ファイルの表示
@@ -0,0 +1,77 @@
import { buildSUUID } from '@peertube/peertube-node-utils'
import { readJSON } from 'fs-extra/esm'
import { parse } from 'node:path'
import { join, resolve } from 'path'
import { AbstractTranscriber, TranscribeArgs } from '../../abstract-transcriber.js'
import { TranscriptFile, TranscriptFormat } from '../../transcript-file.js'
export class OpenaiTranscriber extends AbstractTranscriber {
async transcribe ({
mediaFilePath,
model,
language,
format,
transcriptDirectory,
runId = buildSUUID()
}: TranscribeArgs): Promise<TranscriptFile> {
this.assertLanguageDetectionAvailable(language)
const $$ = this.getExec(this.getExecEnv())
const languageArgs = language ? [ '--language', language ] : []
this.createRun(runId)
this.startRun()
await $$`${this.getEngineBinary()} ${[
mediaFilePath,
'--word_timestamps',
'True',
'--model',
model?.path || model.name,
'--output_format',
'all',
'--output_dir',
transcriptDirectory,
...languageArgs
]}`
this.stopRun()
return new TranscriptFile({
language: language || await this.getDetectedLanguage(transcriptDirectory, mediaFilePath),
path: this.getTranscriptFilePath(transcriptDirectory, mediaFilePath, format),
format
})
}
// ---------------------------------------------------------------------------
protected async getDetectedLanguage (transcriptDirectory: string, mediaFilePath: string) {
const { language } = await this.readJsonTranscriptFile(transcriptDirectory, mediaFilePath)
return language
}
protected async readJsonTranscriptFile (transcriptDirectory: string, mediaFilePath: string) {
return readJSON(this.getTranscriptFilePath(transcriptDirectory, mediaFilePath, 'json'), 'utf8')
}
protected getTranscriptFilePath (transcriptDirectory: string, mediaFilePath: string, format: TranscriptFormat) {
return join(transcriptDirectory, `${parse(mediaFilePath).name}.${format}`)
}
// ---------------------------------------------------------------------------
async install (directory: string) {
const $$ = this.getExec()
await $$`pip3 install -U -t ${[ directory ]} openai-whisper==${this.engine.version}`
}
protected getExecEnv () {
if (!this.binDirectory) return undefined
return { PYTHONPATH: resolve(this.binDirectory, '../') }
}
}
+11
ファイルの表示
@@ -0,0 +1,11 @@
import { TranscriptionModel } from '../transcription-model.js'
export type WhisperBuiltinModelName = 'tiny' | 'base' | 'small' | 'medium' | 'large' | 'large-v2' | 'large-v3'
export class WhisperBuiltinModel extends TranscriptionModel {
// eslint-disable-next-line @typescript-eslint/no-useless-constructor
constructor (name: WhisperBuiltinModelName) {
super(name)
}
}