btrc-hub/frontend/src/lib/gekanatorCandidateRecovery.test.ts

import { describe, expect, it } from 'vitest'

import {
    candidatePostsFor,
    hardFilteredPostsForAnswer,
    recoverCandidatePosts,
} from '@/lib/gekanatorCandidateRecovery'

import type {
    GekanatorAnswerLog,
    GekanatorAnswerValue,
    GekanatorQuestion,
} from '@/lib/gekanator'
import type { Post } from '@/types'


const post = (id: number): Post => ({
  id,
  versionNo: 1,
  url: `https://example.com/posts/${ id }`,
  title: `post ${ id }`,
  thumbnail: null,
  thumbnailBase: null,
  tags: [],
  viewed: false,
  related: [],
  originalCreatedFrom: null,
  originalCreatedBefore: null,
  createdAt: '2026-06-10T00:00:00.000Z',
  updatedAt: '2026-06-10T00:00:00.000Z',
  uploadedUser: null,
})


const postSimilarityQuestion = (
    id:       string,
    answers:  Record<`${ number }`, GekanatorAnswerValue>,
): GekanatorQuestion => ({
  id,
  text: `${ id }?`,
  kind: 'post_similarity',
  condition: {
      type: 'post-similarity',
      postId: 9999,
      answer: 'yes',
      threshold: 0.65 },
  source: 'user_suggested',
  priorityWeight: 1,
  exampleAnswers: answers,
  test: candidate => answers[String (candidate.id) as `${ number }`] === 'yes',
})


const sourceQuestion = (
    host:  string,
): GekanatorQuestion => ({
  id: `source:${ host }`,
  text: `${ host }?`,
  kind: 'source',
  condition: {
      type: 'source',
      host },
  source: 'default',
  priorityWeight: 1,
  test: candidate => new URL (candidate.url).hostname === host,
})


const answer = (
    question:  GekanatorQuestion,
    value:     GekanatorAnswerValue,
): GekanatorAnswerLog => ({
  questionId: question.id,
  questionText: question.text,
  questionCondition: question.condition,
  answer: value,
  originalAnswer: value,
})


describe('candidatePostsFor', () => {
  it('does not hard-filter semantic post_similarity answers', () => {
    const posts = [post (1), post (2), post (3)]
    const oldQuestion = postSimilarityQuestion ('old', {
	1: 'no',
	2: 'yes',
	3: 'yes',
    })
    const laterQuestion = postSimilarityQuestion ('later', {
	1: 'no',
	2: 'no',
	3: 'yes',
    })

    const candidates = candidatePostsFor ({
	posts,
	questions: [oldQuestion, laterQuestion],
	answers: [answer (oldQuestion, 'yes'), answer (laterQuestion, 'yes')],
	softenedQuestionIds: new Set (),
	rejectedPostIds: new Set (),
	recoveredCandidatePosts: new Map ([
	    [1, 1],
	    [3, 1],
	]) })

    expect(candidates.map (candidate => candidate.id)).toEqual ([1, 2, 3])
  })

  it('lets recovered candidates ignore old fact answers but not later fact answers', () => {
    const posts = [
	{ ...post (1), url: 'https://other.example/posts/1' },
	post (2),
	{ ...post (3), url: 'https://example.com/posts/3' },
    ]
    const oldQuestion = sourceQuestion ('old.example.com')
    const laterQuestion = sourceQuestion ('example.com')

    const candidates = candidatePostsFor ({
	posts,
	questions: [oldQuestion, laterQuestion],
	answers: [answer (oldQuestion, 'yes'), answer (laterQuestion, 'yes')],
	softenedQuestionIds: new Set (),
	rejectedPostIds: new Set (),
	recoveredCandidatePosts: new Map ([
	    [1, 1],
	    [3, 1],
	]) })

    expect(candidates.map (candidate => candidate.id)).toEqual ([3])
  })

  it('does not let recovered candidates bypass explicit rejected posts', () => {
    const posts = [post (1), post (2)]
    const question = postSimilarityQuestion ('question', {
	1: 'yes',
	2: 'yes',
    })

    const candidates = candidatePostsFor ({
	posts,
	questions: [question],
	answers: [answer (question, 'yes')],
	softenedQuestionIds: new Set (),
	rejectedPostIds: new Set ([1]),
	recoveredCandidatePosts: new Map ([[1, 1]]) })

    expect(candidates.map (candidate => candidate.id)).toEqual ([2])
  })
})


describe('hardFilteredPostsForAnswer', () => {
  it('keeps the original pool for semantic post_similarity answers', () => {
    const posts = [post (1), post (2)]
    const question = postSimilarityQuestion ('question', {
	1: 'yes',
	2: 'yes',
    })

    expect(hardFilteredPostsForAnswer ({
	posts,
	question,
	answer: 'no',
    })).toEqual (posts)
  })

  it('hard-filters fact answers only for yes and no', () => {
    const posts = [
	{ ...post (1), url: 'https://example.com/posts/1' },
	{ ...post (2), url: 'https://other.example/posts/2' },
    ]
    const question = sourceQuestion ('example.com')

    expect(hardFilteredPostsForAnswer ({
	posts,
	question,
	answer: 'yes',
    }).map (candidate => candidate.id)).toEqual ([1])
    expect(hardFilteredPostsForAnswer ({
	posts,
	question,
	answer: 'no',
    }).map (candidate => candidate.id)).toEqual ([2])
    expect(hardFilteredPostsForAnswer ({
	posts,
	question,
	answer: 'partial',
    })).toEqual (posts)
    expect(hardFilteredPostsForAnswer ({
	posts,
	question,
	answer: 'probably_no',
    })).toEqual (posts)
    expect(hardFilteredPostsForAnswer ({
	posts,
	question,
	answer: 'unknown',
    })).toEqual (posts)
  })
})


describe('recoverCandidatePosts', () => {
  it('recovers high-score non-rejected, non-eligible candidates in staged batches', () => {
    const posts = Array.from ({ length: 10 }, (_value, index) => post (index + 1))
    const scores = new Map (posts.map (candidate => [candidate.id, candidate.id]))

    const recovered = recoverCandidatePosts ({
	posts,
	scores,
	rejectedPostIds: new Set ([10]),
	recoveredCandidatePosts: new Map ([[8, 1]]),
	eligiblePostIds: new Set ([9]),
	answerCountAtRecovery: 2,
	recoveryStepCount: 0,
    })

    expect(recovered?.recoveryStepCount).toBe (1)
    expect([...(recovered?.recoveredCandidatePosts.keys () ?? [])])
      .toEqual ([8, 7, 6, 5, 4])
    expect(recovered?.recoveredCandidatePosts.get (7)).toBe (2)
  })

  it('does not add posts when recovered and eligible candidates already hit the target', () => {
    const posts = Array.from ({ length: 10 }, (_value, index) => post (index + 1))
    const scores = new Map (posts.map (candidate => [candidate.id, candidate.id]))

    const recovered = recoverCandidatePosts ({
	posts,
	scores,
	rejectedPostIds: new Set (),
	recoveredCandidatePosts: new Map ([
	    [1, 1],
	    [2, 1],
	    [3, 1],
	]),
	eligiblePostIds: new Set ([4, 5, 6]),
	answerCountAtRecovery: 2,
	recoveryStepCount: 0,
    })

    expect(recovered?.recoveryStepCount).toBe (1)
    expect([...(recovered?.recoveredCandidatePosts.keys () ?? [])])
      .toEqual ([1, 2, 3])
  })
})