ぼざクリタグ広場 https://hub.nizika.monster
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

107 lines
2.4 KiB

  1. module Similarity
  2. class Calc
  3. def self.call model, tgt
  4. similarity_model = "#{ model.name }Similarity".constantize
  5. # 最大保存件数
  6. n = 20
  7. similarity_model.delete_all
  8. posts = model.includes(tgt).select(:id).to_a
  9. tag_ids = { }
  10. tag_cnts = { }
  11. posts.each do |p|
  12. arr = p.public_send(tgt).map(&:id).sort
  13. tag_ids[p.id] = arr
  14. tag_cnts[p.id] = arr.size
  15. end
  16. intersection_size = -> a, b do
  17. i = 0
  18. j = 0
  19. cnt = 0
  20. while i < a.size && j < b.size
  21. a_i = a[i]
  22. b_j = b[j]
  23. if a_i == b_j
  24. cnt += 1
  25. i += 1
  26. j += 1
  27. elsif a_i < b_j
  28. i += 1
  29. else
  30. j += 1
  31. end
  32. end
  33. cnt
  34. end
  35. push_topk = -> list, cos, target_id do
  36. return if list.size >= n && cos <= list[-1][0]
  37. idx = nil
  38. list.each_with_index do |(c, tid), i|
  39. if tid == target_id
  40. idx = i
  41. break
  42. end
  43. end
  44. if idx
  45. return if cos <= list[idx][0]
  46. list.delete_at(idx)
  47. end
  48. insert_at = list.size
  49. list.each_with_index do |(c, _), i|
  50. if cos > c
  51. insert_at = i
  52. break
  53. end
  54. end
  55. list.insert(insert_at, [cos, target_id])
  56. list.pop if list.size > n
  57. end
  58. top = Hash.new { |h, key| h[key] = [] }
  59. ids = posts.map(&:id)
  60. ids.each_with_index do |post_id, i|
  61. a = tag_ids[post_id]
  62. a_cnt = tag_cnts[post_id]
  63. ((i + 1)...ids.size).each do |j|
  64. target_id = ids[j]
  65. b = tag_ids[target_id]
  66. b_cnt = tag_cnts[target_id]
  67. norm = Math.sqrt(a_cnt * b_cnt)
  68. cos = norm.zero? ? 0.0 : intersection_size.(a, b).fdiv(norm)
  69. push_topk.(top[post_id], cos, target_id)
  70. push_topk.(top[target_id], cos, post_id)
  71. end
  72. end
  73. buf = []
  74. flush = -> do
  75. return if buf.empty?
  76. similarity_model.insert_all!(buf)
  77. buf.clear
  78. end
  79. top.each do |post_id, list|
  80. list.each do |cos, target_post_id|
  81. buf << { "#{ model.name.underscore }_id".to_sym => post_id,
  82. "target_#{ model.name.underscore }_id".to_sym => target_post_id,
  83. cos: }
  84. flush.call if buf.size >= 1_000
  85. end
  86. end
  87. flush.call
  88. end
  89. end
  90. end