このコミットが含まれているのは:
2026-06-14 00:01:03 +09:00
コミット 3b5ad3b805
10個のファイルの変更3046行の追加387行の削除
+67 -10
ファイルの表示
@@ -13,6 +13,7 @@ export type GekanatorAnswerLog = {
questionId: string
questionText: string
questionCondition?: GekanatorQuestionCondition
questionMode?: 'normal' | 'winning_run'
answer: GekanatorAnswerValue
originalAnswer: GekanatorAnswerValue }
@@ -29,6 +30,8 @@ export type GekanatorQuestionSource =
| 'ai_generated'
| 'admin_curated'
export type GekanatorPerformanceMode = 'lite' | 'normal'
export type GekanatorQuestionCondition =
| { type: 'tag'; key: string }
| { type: 'source'; host: string }
@@ -38,6 +41,7 @@ export type GekanatorQuestionCondition =
| { type: 'title-length-at-least'; length: number }
| { type: 'title-length-greater-than'; length: number }
| { type: 'title-has-ascii' }
| { type: 'title-contains'; text: string }
| {
type: 'post-similarity'
postId: number
@@ -76,6 +80,13 @@ export type GekanatorQuestion = {
exampleAnswers?: Record<`${ number }`, GekanatorAnswerValue>
test: (post: Post) => boolean }
export type BuildGekanatorQuestionsOptions = {
includeTitleContains?: boolean
tagQuestionCap?: number
titleContainsCap?: number
totalQuestionCap?: number
}
export const normalizeTitleLengthCondition = (
condition: GekanatorQuestionCondition,
@@ -127,6 +138,8 @@ export const questionIdForCondition = (
return `title:length-at-least:${ titleLengthMinimumForCondition (condition) }`
case 'title-has-ascii':
return 'title:ascii'
case 'title-contains':
return `title:contains:${ condition.text }`
}
}
@@ -292,6 +305,8 @@ const questionMatches = (
return (post.title?.length ?? 0) > question.condition.length
case 'title-has-ascii':
return /[A-Za-z0-9]/.test (post.title ?? '')
case 'title-contains':
return (post.title ?? '').includes (question.condition.text)
case 'post-similarity':
return false
}
@@ -319,6 +334,7 @@ export const expectedAnswerForQuestion = (
case 'title-length-at-least':
case 'title-length-greater-than':
case 'title-has-ascii':
case 'title-contains':
return questionMatches (post, question) ? 'yes' : 'no'
case 'post-similarity':
return null
@@ -382,7 +398,16 @@ export const fetchGekanatorExtraQuestions = async (
}
export const buildGekanatorQuestions = (posts: Post[]): GekanatorQuestion[] => {
export const buildGekanatorQuestions = (
posts: Post[],
options: BuildGekanatorQuestionsOptions = { },
): GekanatorQuestion[] => {
const {
includeTitleContains = true,
tagQuestionCap = 192,
titleContainsCap = 24,
totalQuestionCap = Number.POSITIVE_INFINITY,
} = options
const tagCounts = countBy (posts.flatMap (post =>
post.tags
.filter (tag =>
@@ -404,17 +429,31 @@ export const buildGekanatorQuestions = (posts: Post[]): GekanatorQuestion[] => {
.map (originalMonthDayOf)
.filter ((monthDay): monthDay is string => monthDay !== null))
const titleLengthMedian = median (posts.map (post => post.title?.length ?? 0))
const titleWordCounts =
includeTitleContains
? countBy (
posts.flatMap (post =>
Array.from (
new Set (
(post.title ?? '')
.match (
/[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}A-Za-z0-9]{2,}/gu)
?? []))))
: new Map<string, number> ()
const usefulEntries = <T extends string | number> (counts: Map<T, number>) =>
const usefulEntries = <T extends string | number> (
counts: Map<T, number>,
cap: number,
) =>
[...counts.entries ()]
.filter (([, count]) => count > 0 && count < posts.length)
.sort ((a, b) => Math.abs (posts.length / 2 - a[1])
- Math.abs (posts.length / 2 - b[1]))
.slice (0, 80)
.slice (0, cap)
const tagQuestions = usefulEntries (tagCounts)
const tagQuestions = usefulEntries (tagCounts, Math.max (tagQuestionCap, 80))
.filter (([, count]) => count >= 2 && count <= Math.max (2, posts.length * .7))
.slice (0, 80)
.slice (0, tagQuestionCap)
.map (([key]) => {
const { category, name } = tagFromQuestionKey (String (key))
const label = category === 'nico' ? nicoTagLabel (name) : name
@@ -429,7 +468,7 @@ export const buildGekanatorQuestions = (posts: Post[]): GekanatorQuestion[] => {
test: (post: Post) => questionableTag (post, String (key)) }
})
const sourceQuestions = usefulEntries (hosts)
const sourceQuestions = usefulEntries (hosts, 20)
.filter (([, count]) => count >= 2 && count <= Math.max (2, posts.length * .7))
.slice (0, 20)
.map (([host]) => ({
@@ -441,7 +480,7 @@ export const buildGekanatorQuestions = (posts: Post[]): GekanatorQuestion[] => {
priorityWeight: 1,
test: (post: Post) => hostOf (post) === host }))
const originalYearQuestions = usefulEntries (originalYears)
const originalYearQuestions = usefulEntries (originalYears, 20)
.filter (([, count]) => count >= 2 && count <= Math.max (2, posts.length * .7))
.slice (0, 20)
.map (([year]) => ({
@@ -453,7 +492,7 @@ export const buildGekanatorQuestions = (posts: Post[]): GekanatorQuestion[] => {
priorityWeight: 1,
test: (post: Post) => originalYearOf (post) === year }))
const originalMonthQuestions = usefulEntries (originalMonths)
const originalMonthQuestions = usefulEntries (originalMonths, 20)
.filter (([, count]) => count >= 2 && count <= Math.max (2, posts.length * .7))
.slice (0, 20)
.map (([month]) => ({
@@ -465,7 +504,7 @@ export const buildGekanatorQuestions = (posts: Post[]): GekanatorQuestion[] => {
priorityWeight: 1,
test: (post: Post) => originalMonthOf (post) === month }))
const originalMonthDayQuestions = usefulEntries (originalMonthDays)
const originalMonthDayQuestions = usefulEntries (originalMonthDays, 20)
.filter (([, count]) => count >= 2 && count <= Math.max (2, posts.length * .7))
.slice (0, 20)
.map (([monthDay]) => {
@@ -505,6 +544,23 @@ export const buildGekanatorQuestions = (posts: Post[]): GekanatorQuestion[] => {
const no = posts.length - yes
return yes >= 2 && no >= 2 && yes <= posts.length * .7 && no <= posts.length * .7
})
const titleContainsQuestions =
includeTitleContains
? usefulEntries (titleWordCounts, titleContainsCap)
.filter (([word, count]) =>
String (word).length <= 24
&& count >= 2
&& count <= Math.max (2, posts.length * .7))
.slice (0, titleContainsCap)
.map (([word]) => ({
id: `title:contains:${ word }`,
text: `題名に「${ word }」が含まれる?`,
kind: 'title' as const,
condition: { type: 'title-contains' as const, text: String (word) },
source: 'default' as const,
priorityWeight: .96,
test: (post: Post) => (post.title ?? '').includes (String (word)) }))
: []
return [
...sourceQuestions,
@@ -512,7 +568,8 @@ export const buildGekanatorQuestions = (posts: Post[]): GekanatorQuestion[] => {
...originalMonthQuestions,
...originalMonthDayQuestions,
...titleQuestions,
...tagQuestions]
...titleContainsQuestions,
...tagQuestions].slice (0, totalQuestionCap)
}
+12 -2
ファイルの表示
@@ -100,7 +100,7 @@ export const allConcreteAnswerOptionsExhausted = (
}
const nextRecoveryBatchSize = (recoveryStepCount: number): number =>
const nextRecoveryTargetSize = (recoveryStepCount: number): number =>
6 * (2 ** recoveryStepCount)
@@ -125,6 +125,16 @@ export const recoverCandidatePosts = ({
recoveryStepCount: number
} | null => {
const recovered = new Map (recoveredCandidatePosts)
const targetSize = nextRecoveryTargetSize (recoveryStepCount)
const countedPostIds = new Set ([
...eligiblePostIds,
...recovered.keys ()])
const addCount = targetSize - countedPostIds.size
if (addCount <= 0)
return {
recoveredCandidatePosts: recovered,
recoveryStepCount: recoveryStepCount + 1 }
const candidates = posts
.filter (post =>
!(rejectedPostIds.has (post.id))
@@ -133,7 +143,7 @@ export const recoverCandidatePosts = ({
.sort ((a, b) =>
(scores.get (b.id) ?? Number.NEGATIVE_INFINITY)
- (scores.get (a.id) ?? Number.NEGATIVE_INFINITY))
.slice (0, nextRecoveryBatchSize (recoveryStepCount))
.slice (0, addCount)
if (candidates.length === 0)
return null