Merge remote-tracking branch 'origin/main' into '#106'
このコミットが含まれているのは:
@@ -1,28 +1,6 @@
|
||||
namespace :post_similarity do
|
||||
desc '関聯投稿テーブル作成'
|
||||
task calc: :environment do
|
||||
dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } }
|
||||
norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) }
|
||||
cos = -> a, b do
|
||||
na = norm.(a)
|
||||
nb = norm.(b)
|
||||
if na.zero? || nb.zero?
|
||||
0.0
|
||||
else
|
||||
dot.(a, b) / na / nb
|
||||
end
|
||||
end
|
||||
|
||||
posts = Post.includes(:tags).to_a
|
||||
posts.each_with_index do |post, i|
|
||||
existence_of_tags = post.tags.index_with(1)
|
||||
((i + 1)...posts.size).each do |j|
|
||||
target_post = posts[j]
|
||||
existence_of_target_tags = target_post.tags.index_with(1)
|
||||
PostSimilarity.find_or_initialize_by(post:, target_post:).tap { |ps|
|
||||
ps.cos = cos.(existence_of_tags, existence_of_target_tags)
|
||||
}.save!
|
||||
end
|
||||
end
|
||||
Similarity::Calc.call(Post, :tags)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace :tag_similarity do
|
||||
desc '関聯タグ・テーブル作成'
|
||||
task calc: :environment do
|
||||
Similarity::Calc.call(Tag, :posts)
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,74 @@
|
||||
namespace :wiki do
|
||||
desc 'Wiki 移行'
|
||||
task migrate: :environment do
|
||||
require 'digest'
|
||||
require 'gollum-lib'
|
||||
|
||||
wiki = Gollum::Wiki.new(Rails.root.join('wiki').to_s)
|
||||
|
||||
WikiPage.where.missing(:wiki_revisions).find_each do |wiki_page|
|
||||
page = wiki.page("#{ wiki_page.id }.md")
|
||||
next unless page
|
||||
|
||||
versions = page.versions
|
||||
next if versions.blank?
|
||||
|
||||
base_revision_id = nil
|
||||
versions.reverse_each do |version|
|
||||
pg = wiki.page("#{ wiki_page.id }.md", version.id)
|
||||
raw = pg&.raw_data
|
||||
next unless raw
|
||||
|
||||
lines = raw.force_encoding('UTF-8').split("\n")
|
||||
|
||||
line_shas = lines.map { |l| Digest::SHA256.hexdigest(l) }
|
||||
tree_sha = Digest::SHA256.hexdigest(line_shas.join(','))
|
||||
|
||||
at = version.authored_date
|
||||
|
||||
line_id_by_sha = WikiLine.where(sha256: line_shas).pluck(:sha256, :id).to_h
|
||||
|
||||
missing_rows = []
|
||||
line_shas.each_with_index do |sha, i|
|
||||
next if line_id_by_sha.key?(sha)
|
||||
|
||||
missing_rows << { sha256: sha,
|
||||
body: lines[i],
|
||||
created_at: at,
|
||||
updated_at: at }
|
||||
end
|
||||
|
||||
if missing_rows.any?
|
||||
WikiLine.upsert_all(missing_rows)
|
||||
line_id_by_sha = WikiLine.where(sha256: line_shas).pluck(:sha256, :id).to_h
|
||||
end
|
||||
line_ids = line_shas.map { |sha| line_id_by_sha.fetch(sha) }
|
||||
|
||||
rev = nil
|
||||
ActiveRecord::Base.transaction do
|
||||
wiki_page.lock!
|
||||
|
||||
rev = WikiRevision.create!(
|
||||
wiki_page:,
|
||||
base_revision_id:,
|
||||
created_user_id: (Integer(version.author.name) rescue 2),
|
||||
kind: :content,
|
||||
redirect_page_id: nil,
|
||||
message: nil,
|
||||
lines_count: lines.length,
|
||||
tree_sha256: tree_sha,
|
||||
created_at: at,
|
||||
updated_at: at)
|
||||
|
||||
rows = line_ids.each_with_index.map do |line_id, pos|
|
||||
{ wiki_revision_id: rev.id,
|
||||
wiki_line_id: line_id,
|
||||
position: pos }
|
||||
end
|
||||
WikiRevisionLine.insert_all!(rows)
|
||||
end
|
||||
base_revision_id = rev.id
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -3,9 +3,10 @@ namespace :nico do
|
||||
task sync: :environment do
|
||||
require 'json'
|
||||
require 'nokogiri'
|
||||
require 'open3'
|
||||
require 'open-uri'
|
||||
require 'open3'
|
||||
require 'set'
|
||||
require 'time'
|
||||
|
||||
fetch_thumbnail = -> url do
|
||||
html = URI.open(url, read_timeout: 60, 'User-Agent' => 'Mozilla/5.0').read
|
||||
@@ -14,9 +15,9 @@ namespace :nico do
|
||||
doc.at('meta[name="thumbnail"]')&.[]('content').presence
|
||||
end
|
||||
|
||||
def sync_post_tags! post, desired_tag_ids
|
||||
def sync_post_tags! post, desired_tag_ids, current_ids: nil
|
||||
current_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set
|
||||
desired_ids = desired_tag_ids.compact.to_set
|
||||
current_ids = post.tags.pluck(:id).to_set
|
||||
|
||||
to_add = desired_ids - current_ids
|
||||
to_remove = current_ids - desired_ids
|
||||
@@ -45,14 +46,43 @@ namespace :nico do
|
||||
|
||||
data = JSON.parse(stdout)
|
||||
data.each do |datum|
|
||||
post = Post.where('url LIKE ?', '%nicovideo.jp%').find { |post|
|
||||
post.url =~ %r{#{ Regexp.escape(datum['code']) }(?!\d)}
|
||||
}
|
||||
unless post
|
||||
title = datum['title']
|
||||
url = "https://www.nicovideo.jp/watch/#{ datum['code'] }"
|
||||
thumbnail_base = fetch_thumbnail.(url) || '' rescue ''
|
||||
post = Post.new(title:, url:, thumbnail_base:, uploaded_user: nil)
|
||||
code = datum['code']
|
||||
|
||||
post =
|
||||
Post
|
||||
.where('url REGEXP ?', "nicovideo\\.jp/watch/#{ Regexp.escape(code) }([^0-9]|$)")
|
||||
.first
|
||||
|
||||
title = datum['title']
|
||||
original_created_at = datum['uploaded_at'] &&
|
||||
Time.strptime(datum['uploaded_at'], '%Y-%m-%d %H:%M:%S')
|
||||
original_created_from = original_created_at&.change(sec: 0)
|
||||
original_created_before = original_created_from&.+(1.minute)
|
||||
|
||||
if post
|
||||
attrs = { title:, original_created_from:, original_created_before: }
|
||||
|
||||
unless post.thumbnail.attached?
|
||||
thumbnail_base = fetch_thumbnail.(post.url) rescue nil
|
||||
if thumbnail_base.present?
|
||||
post.thumbnail.attach(
|
||||
io: URI.open(thumbnail_base),
|
||||
filename: File.basename(URI.parse(thumbnail_base).path),
|
||||
content_type: 'image/jpeg')
|
||||
attrs[:thumbnail_base] = thumbnail_base
|
||||
end
|
||||
end
|
||||
|
||||
post.assign_attributes(attrs)
|
||||
if post.changed?
|
||||
post.save!
|
||||
post.resized_thumbnail! if post.thumbnail.attached?
|
||||
end
|
||||
else
|
||||
url = "https://www.nicovideo.jp/watch/#{ code }"
|
||||
thumbnail_base = fetch_thumbnail.(url) rescue nil
|
||||
post = Post.new(title:, url:, thumbnail_base:, uploaded_user: nil,
|
||||
original_created_from:, original_created_before:)
|
||||
if thumbnail_base.present?
|
||||
post.thumbnail.attach(
|
||||
io: URI.open(thumbnail_base),
|
||||
@@ -61,24 +91,22 @@ namespace :nico do
|
||||
end
|
||||
post.save!
|
||||
post.resized_thumbnail!
|
||||
sync_post_tags!(post, [Tag.tagme.id])
|
||||
sync_post_tags!(post, [Tag.tagme.id, Tag.bot.id, Tag.niconico.id, Tag.video.id])
|
||||
end
|
||||
|
||||
kept_tags = post.tags.reload
|
||||
kept_non_nico_ids = kept_tags.where.not(category: 'nico').pluck(:id).to_set
|
||||
kept_ids = PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set
|
||||
kept_non_nico_ids = post.tags.where.not(category: 'nico').pluck(:id).to_set
|
||||
|
||||
desired_nico_ids = []
|
||||
desired_non_nico_ids = []
|
||||
datum['tags'].each do |raw|
|
||||
name = "nico:#{ raw }"
|
||||
tag = Tag.find_or_initialize_by(name:) do |t|
|
||||
t.category = 'nico'
|
||||
end
|
||||
tag.save! if tag.new_record?
|
||||
tag = Tag.find_or_create_by_tag_name!(name, category: 'nico')
|
||||
desired_nico_ids << tag.id
|
||||
unless tag.in?(kept_tags)
|
||||
desired_non_nico_ids.concat(tag.linked_tags.pluck(:id))
|
||||
desired_nico_ids.concat(tag.linked_tags.pluck(:id))
|
||||
unless tag.id.in?(kept_ids)
|
||||
linked_ids = tag.linked_tags.pluck(:id)
|
||||
desired_non_nico_ids.concat(linked_ids)
|
||||
desired_nico_ids.concat(linked_ids)
|
||||
end
|
||||
end
|
||||
desired_nico_ids.uniq!
|
||||
@@ -94,7 +122,7 @@ namespace :nico do
|
||||
end
|
||||
desired_all_ids.uniq!
|
||||
|
||||
sync_post_tags!(post, desired_all_ids)
|
||||
sync_post_tags!(post, desired_all_ids, current_ids: kept_ids)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
新しい課題から参照
ユーザをブロックする