import { describe, expect, it } from 'vitest' import { candidatePostsFor, hardFilteredPostsForAnswer, recoverCandidatePosts, } from '@/lib/gekanatorCandidateRecovery' import type { GekanatorAnswerLog, GekanatorAnswerValue, GekanatorQuestion, } from '@/lib/gekanator' import type { Post } from '@/types' const post = (id: number): Post => ({ id, versionNo: 1, url: `https://example.com/posts/${ id }`, title: `post ${ id }`, thumbnail: null, thumbnailBase: null, tags: [], viewed: false, related: [], originalCreatedFrom: null, originalCreatedBefore: null, createdAt: '2026-06-10T00:00:00.000Z', updatedAt: '2026-06-10T00:00:00.000Z', uploadedUser: null, }) const postSimilarityQuestion = ( id: string, answers: Record<`${ number }`, GekanatorAnswerValue>, ): GekanatorQuestion => ({ id, text: `${ id }?`, kind: 'post_similarity', condition: { type: 'post-similarity', postId: 9999, answer: 'yes', threshold: 0.65 }, source: 'user_suggested', priorityWeight: 1, exampleAnswers: answers, test: candidate => answers[String (candidate.id) as `${ number }`] === 'yes', }) const sourceQuestion = ( host: string, ): GekanatorQuestion => ({ id: `source:${ host }`, text: `${ host }?`, kind: 'source', condition: { type: 'source', host }, source: 'default', priorityWeight: 1, test: candidate => new URL (candidate.url).hostname === host, }) const answer = ( question: GekanatorQuestion, value: GekanatorAnswerValue, ): GekanatorAnswerLog => ({ questionId: question.id, questionText: question.text, questionCondition: question.condition, answer: value, originalAnswer: value, }) describe('candidatePostsFor', () => { it('does not hard-filter semantic post_similarity answers', () => { const posts = [post (1), post (2), post (3)] const oldQuestion = postSimilarityQuestion ('old', { 1: 'no', 2: 'yes', 3: 'yes', }) const laterQuestion = postSimilarityQuestion ('later', { 1: 'no', 2: 'no', 3: 'yes', }) const candidates = candidatePostsFor ({ posts, questions: [oldQuestion, laterQuestion], answers: [answer (oldQuestion, 'yes'), answer (laterQuestion, 'yes')], softenedQuestionIds: new Set (), rejectedPostIds: new Set (), recoveredCandidatePosts: new Map ([ [1, 1], [3, 1], ]) }) expect(candidates.map (candidate => candidate.id)).toEqual ([1, 2, 3]) }) it('lets recovered candidates ignore old fact answers but not later fact answers', () => { const posts = [ { ...post (1), url: 'https://other.example/posts/1' }, post (2), { ...post (3), url: 'https://example.com/posts/3' }, ] const oldQuestion = sourceQuestion ('old.example.com') const laterQuestion = sourceQuestion ('example.com') const candidates = candidatePostsFor ({ posts, questions: [oldQuestion, laterQuestion], answers: [answer (oldQuestion, 'yes'), answer (laterQuestion, 'yes')], softenedQuestionIds: new Set (), rejectedPostIds: new Set (), recoveredCandidatePosts: new Map ([ [1, 1], [3, 1], ]) }) expect(candidates.map (candidate => candidate.id)).toEqual ([3]) }) it('does not let recovered candidates bypass explicit rejected posts', () => { const posts = [post (1), post (2)] const question = postSimilarityQuestion ('question', { 1: 'yes', 2: 'yes', }) const candidates = candidatePostsFor ({ posts, questions: [question], answers: [answer (question, 'yes')], softenedQuestionIds: new Set (), rejectedPostIds: new Set ([1]), recoveredCandidatePosts: new Map ([[1, 1]]) }) expect(candidates.map (candidate => candidate.id)).toEqual ([2]) }) }) describe('hardFilteredPostsForAnswer', () => { it('keeps the original pool for semantic post_similarity answers', () => { const posts = [post (1), post (2)] const question = postSimilarityQuestion ('question', { 1: 'yes', 2: 'yes', }) expect(hardFilteredPostsForAnswer ({ posts, question, answer: 'no', })).toEqual (posts) }) it('hard-filters fact answers only for yes and no', () => { const posts = [ { ...post (1), url: 'https://example.com/posts/1' }, { ...post (2), url: 'https://other.example/posts/2' }, ] const question = sourceQuestion ('example.com') expect(hardFilteredPostsForAnswer ({ posts, question, answer: 'yes', }).map (candidate => candidate.id)).toEqual ([1]) expect(hardFilteredPostsForAnswer ({ posts, question, answer: 'no', }).map (candidate => candidate.id)).toEqual ([2]) expect(hardFilteredPostsForAnswer ({ posts, question, answer: 'partial', })).toEqual (posts) expect(hardFilteredPostsForAnswer ({ posts, question, answer: 'probably_no', })).toEqual (posts) expect(hardFilteredPostsForAnswer ({ posts, question, answer: 'unknown', })).toEqual (posts) }) }) describe('recoverCandidatePosts', () => { it('recovers high-score non-rejected, non-eligible candidates in staged batches', () => { const posts = Array.from ({ length: 10 }, (_value, index) => post (index + 1)) const scores = new Map (posts.map (candidate => [candidate.id, candidate.id])) const recovered = recoverCandidatePosts ({ posts, scores, rejectedPostIds: new Set ([10]), recoveredCandidatePosts: new Map ([[8, 1]]), eligiblePostIds: new Set ([9]), answerCountAtRecovery: 2, recoveryStepCount: 0, }) expect(recovered?.recoveryStepCount).toBe (1) expect([...(recovered?.recoveredCandidatePosts.keys () ?? [])]) .toEqual ([8, 7, 6, 5, 4]) expect(recovered?.recoveredCandidatePosts.get (7)).toBe (2) }) it('does not add posts when recovered and eligible candidates already hit the target', () => { const posts = Array.from ({ length: 10 }, (_value, index) => post (index + 1)) const scores = new Map (posts.map (candidate => [candidate.id, candidate.id])) const recovered = recoverCandidatePosts ({ posts, scores, rejectedPostIds: new Set (), recoveredCandidatePosts: new Map ([ [1, 1], [2, 1], [3, 1], ]), eligiblePostIds: new Set ([4, 5, 6]), answerCountAtRecovery: 2, recoveryStepCount: 0, }) expect(recovered?.recoveryStepCount).toBe (1) expect([...(recovered?.recoveredCandidatePosts.keys () ?? [])]) .toEqual ([1, 2, 3]) }) })