f6de272f55
#228 #228 #228 Co-authored-by: miteruzo <miteruzo@naver.com> Reviewed-on: #232
107 lines
2.4 KiB
Ruby
107 lines
2.4 KiB
Ruby
module Similarity
|
|
class Calc
|
|
def self.call model, tgt
|
|
similarity_model = "#{ model.name }Similarity".constantize
|
|
|
|
# 最大保存件数
|
|
n = 20
|
|
|
|
similarity_model.delete_all
|
|
|
|
posts = model.includes(tgt).select(:id).to_a
|
|
|
|
tag_ids = { }
|
|
tag_cnts = { }
|
|
|
|
posts.each do |p|
|
|
arr = p.public_send(tgt).map(&:id).sort
|
|
tag_ids[p.id] = arr
|
|
tag_cnts[p.id] = arr.size
|
|
end
|
|
|
|
intersection_size = -> a, b do
|
|
i = 0
|
|
j = 0
|
|
cnt = 0
|
|
while i < a.size && j < b.size
|
|
a_i = a[i]
|
|
b_j = b[j]
|
|
if a_i == b_j
|
|
cnt += 1
|
|
i += 1
|
|
j += 1
|
|
elsif a_i < b_j
|
|
i += 1
|
|
else
|
|
j += 1
|
|
end
|
|
end
|
|
cnt
|
|
end
|
|
|
|
push_topk = -> list, cos, target_id do
|
|
return if list.size >= n && cos <= list[-1][0]
|
|
|
|
idx = nil
|
|
list.each_with_index do |(c, tid), i|
|
|
if tid == target_id
|
|
idx = i
|
|
break
|
|
end
|
|
end
|
|
if idx
|
|
return if cos <= list[idx][0]
|
|
list.delete_at(idx)
|
|
end
|
|
|
|
insert_at = list.size
|
|
list.each_with_index do |(c, _), i|
|
|
if cos > c
|
|
insert_at = i
|
|
break
|
|
end
|
|
end
|
|
list.insert(insert_at, [cos, target_id])
|
|
list.pop if list.size > n
|
|
end
|
|
|
|
top = Hash.new { |h, key| h[key] = [] }
|
|
|
|
ids = posts.map(&:id)
|
|
ids.each_with_index do |post_id, i|
|
|
a = tag_ids[post_id]
|
|
a_cnt = tag_cnts[post_id]
|
|
|
|
((i + 1)...ids.size).each do |j|
|
|
target_id = ids[j]
|
|
b = tag_ids[target_id]
|
|
b_cnt = tag_cnts[target_id]
|
|
|
|
norm = Math.sqrt(a_cnt * b_cnt)
|
|
cos = norm.zero? ? 0.0 : intersection_size.(a, b).fdiv(norm)
|
|
|
|
push_topk.(top[post_id], cos, target_id)
|
|
push_topk.(top[target_id], cos, post_id)
|
|
end
|
|
end
|
|
|
|
buf = []
|
|
flush = -> do
|
|
return if buf.empty?
|
|
similarity_model.insert_all!(buf)
|
|
buf.clear
|
|
end
|
|
|
|
top.each do |post_id, list|
|
|
list.each do |cos, target_post_id|
|
|
buf << { "#{ model.name.underscore }_id".to_sym => post_id,
|
|
"target_#{ model.name.underscore }_id".to_sym => target_post_id,
|
|
cos: }
|
|
flush.call if buf.size >= 1_000
|
|
end
|
|
end
|
|
flush.call
|
|
end
|
|
end
|
|
end
|