削除フラグが誤って付与されるバグ修正(#20) (#21)

#20

#20

#20

#020

Co-authored-by: miteruzo <miteruzo@naver.com>
Reviewed-on: #21
このコミットはPull リクエスト #21 でマージされました.
このコミットが含まれているのは:
2026-04-11 05:13:29 +09:00
コミット cb72b8dd99
5個のファイルの変更240行の追加133行の削除
+23 -4
ファイルの表示
@@ -19,13 +19,32 @@ class Model (eloquent.Model):
self,
*args: str,
) -> None:
q = self.query ()
for arg in args:
q = q.where (arg, getattr (self, arg))
row = q.first ()
row = self._find_upsert_row (*args)
if row is not None:
self.id = row.id
# pylint: disable = invalid-name
# pylint: disable = attribute-defined-outside-init
self._Model__exists = True
self.save ()
return
try:
self.save ()
except Exception:
row = self._find_upsert_row (*args)
if row is None:
raise
self.id = row.id
# pylint: disable = invalid-name
# pylint: disable = attribute-defined-outside-init
self._Model__exists = True
self.save ()
def _find_upsert_row (
self,
*args: str,
):
q = self.query ()
for arg in args:
q = q.where (arg, getattr (self, arg))
return q.first ()
+2 -5
ファイルの表示
@@ -8,12 +8,9 @@
from __future__ import annotations
import json
import os
import sys
from datetime import date, datetime
from typing import TypedDict, cast
from eloquent import DatabaseManager, Model
from datetime import datetime
from typing import TypedDict
from db.config import DB
from db.models import Video
+7 -9
ファイルの表示
@@ -8,12 +8,9 @@
from __future__ import annotations
import json
import os
import sys
from datetime import date, datetime
from typing import TypedDict, cast
from eloquent import DatabaseManager, Model
from typing import cast
from db.config import DB
from db.models import Video, VideoHistory
@@ -25,14 +22,15 @@ def main (
views_counts: list[int],
base_date: date,
) -> None:
if not base_date:
base_date = datetime.now ().date ()
kiriban_list: list[tuple[int, str, str]] = []
latest_fetched_at = cast (date, (VideoHistory
latest_fetched_at = cast (date | None,
(VideoHistory
.where ('fetched_at', '<=', base_date)
.max ('fetched_at')))
if latest_fetched_at is None:
print ('[]')
return
for views_count in views_counts:
targets = { vh.video.code for vh in (
@@ -63,5 +61,5 @@ def main (
if __name__ == '__main__':
main (map (int, sys.argv[2:]),
main (list (map (int, sys.argv[2:])),
datetime.strptime (sys.argv[1], '%Y-%m-%d').date ())
-13
ファイルの表示
@@ -8,12 +8,9 @@
from __future__ import annotations
import json
import os
from datetime import date, datetime
from typing import TypedDict
from eloquent import DatabaseManager, Model
from db.config import DB
from db.models import Video
@@ -40,16 +37,6 @@ def main (
print (json.dumps (videos, default = str))
class DbConfig (TypedDict):
driver: str
host: str
database: str
user: str
password: str
prefix: str
class VideoDict (TypedDict):
id: int
code: str
+176 -70
ファイルの表示
@@ -8,17 +8,16 @@
from __future__ import annotations
import json
import os
import logging
import random
import string
import time
import unicodedata
from datetime import datetime, timedelta
from datetime import date, datetime, timedelta
from typing import Any, TypedDict, cast
import jaconv
import requests
from eloquent import DatabaseManager, Model
from db.config import DB
from db.models import (Comment,
@@ -29,39 +28,55 @@ from db.models import (Comment,
VideoHistory,
VideoTag)
logger = logging.getLogger (__name__)
logging.basicConfig (
level = logging.INFO,
format = '%(asctime)s %(levelname)s %(message)s')
def main (
) -> None:
now = datetime.now ()
today = now.date ()
api_data = search_nico_by_tags (['伊地知ニジカ',
search_result = search_nico_by_tags (['伊地知ニジカ',
'ぼざろクリーチャーシリーズ',
'ぼざろクリーチャーシリーズ外伝'])
comments_by_video_code = fetch_comments_by_video_code (search_result['videos'])
DB.begin_transaction ()
context: UpdateContext = { 'api_data': search_result['videos'],
'comments_by_video_code': comments_by_video_code,
'deletable': search_result['is_complete'] }
connection = DB.connection ()
connection.begin_transaction ()
try:
update_tables (api_data, now)
DB.commit ()
update_tables (context, now, today)
connection.commit ()
except Exception:
DB.rollback ()
connection.rollback ()
raise
def update_tables (
api_data: list[VideoResult],
context: UpdateContext,
now: datetime,
today: date,
) -> None:
alive_video_codes: list[str] = []
for datum in api_data:
tag_names: list[str] = datum['tags'].split ()
for datum in context['api_data']:
tag_names = datum['tags'].split ()
normalised_tag_names = {normalise (tag_name) for tag_name in tag_names}
user: User | None = None
if datum['userId']:
if datum['userId'] is not None:
user = User.where ('code', str (datum['userId'])).first ()
if user is None:
user = User ()
user.code = str (datum['userId'])
user.save ()
video = Video ()
video.code = datum['contentId']
video.user_id = user.id if user else None
@@ -71,60 +86,72 @@ def update_tables (
video.deleted_at = None
video.upsert ()
alive_video_codes.append (video.code)
video_history = VideoHistory ()
video_history.video_id = video.id
video_history.fetched_at = now
video_history.fetched_at = today
video_history.views_count = datum['viewCounter']
video_history.upsert ()
video_tags = [video_tag for video_tag in video.video_tags
if video_tag.untagged_at is None]
tag: Tag | None
video_tag: VideoTag | None
for video_tag in video_tags:
tag = video_tag.tag
if (tag is not None
and (normalise (tag.name) not in map (normalise, tag_names))):
video_tag.untagged_at = now
if tag is None:
continue
if normalise (tag.name) in normalised_tag_names:
continue
video_tag.untagged_at = today
video_tag.save ()
for tag_name in tag_names:
tag = Tag.where ('name', tag_name).first ()
if tag is None:
tag = Tag ()
tag.name = tag_name
tag.save ()
video_tag = (VideoTag.where ('video_id', video.id)
.where ('tag_id', tag.id)
.where_null ('untagged_at')
.first ())
if video_tag is None:
video_tag = VideoTag ()
video_tag.video_id = video.id
video_tag.tag_id = tag.id
video_tag.tagged_at = now
video_tag.tagged_at = getattr (video_tag, 'tagged_at', None) or today
video_tag.untagged_at = None
video_tag.save ()
for com in fetch_comments (video.code):
video_tag.upsert ()
for com in context['comments_by_video_code'].get (video.code, []):
user = User.where ('code', com['userId']).first ()
if user is None:
user = User ()
user.code = com['userId']
user.save ()
comment = Comment ()
comment.video_id = video.id
comment.comment_no = com['no']
comment.user_id = user.id
comment.content = com['body']
comment.posted_at = datetime.fromisoformat (com['postedAt'])
comment.nico_count = com['nicoruCount']
comment.vpos_ms = com['vposMs']
comment.nico_count = com.get ('nicoruCount', 0)
comment.vpos_ms = com.get ('vposMs', 0)
comment.upsert ()
# 削除動画
if not context['deletable']:
logger.warning ('skip soft-delete because the latest fetch was incomplete')
return
if not alive_video_codes:
logger.warning ('skip soft-delete because no alive videos were fetched')
return
videos = (Video.where_not_in ('code', alive_video_codes)
.where_null ('deleted_at')
.get ())
for video in videos:
if video.code not in alive_video_codes:
video.deleted_at = now
video.save ()
@@ -143,19 +170,38 @@ def fetch_video_data (
+ '_'
+ str (random.randrange (10 ** 12, 10 ** 13)))
url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
+ f"?actionTrackId={ action_track_id }")
url = (f'https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }'
+ f'?actionTrackId={ action_track_id }')
return requests.post (url, headers = headers, timeout = 60).json ()
def fetch_comments_by_video_code (
videos: list[VideoResult],
) -> dict[str, list[CommentResult]]:
comments_by_video_code: dict[str, list[CommentResult]] = {}
for video in videos:
video_code = video['contentId']
try:
comments_by_video_code[video_code] = fetch_comments (video_code)
except (KeyError,
TypeError,
ValueError,
requests.RequestException) as exc:
logger.warning ('failed to fetch comments: %s (%s)', video_code, exc)
comments_by_video_code[video_code] = []
return comments_by_video_code
def fetch_comments (
video_code: str,
) -> list[CommentResult]:
try:
nv_comment = fetch_video_data (video_code)['data']['comment']['nvComment']
except KeyError:
return []
video_data = fetch_video_data (video_code)
nv_comment = (video_data.get ('data', {})
.get ('comment', {})
.get ('nvComment'))
if nv_comment is None:
return []
@@ -169,26 +215,63 @@ def fetch_comments (
url = nv_comment['server'] + '/v1/threads'
res = (requests.post (url, json.dumps (params),
response = requests.post (url,
json = params,
headers = headers,
timeout = 60)
.json ())
response.raise_for_status ()
res = response.json ()
try:
return res['data']['threads'][1]['comments']
except (IndexError, KeyError):
return select_comments_from_threads (res)
def select_comments_from_threads (
response: dict[str, Any],
) -> list[CommentResult]:
threads = response.get ('data', {}).get ('threads', [])
if not isinstance (threads, list):
return []
main_comments: list[CommentResult] = []
fallback_comments: list[CommentResult] = []
for thread in threads:
comments = thread.get ('comments') if isinstance (thread, dict) else None
if not isinstance (comments, list):
continue
casted_comments = cast (list[CommentResult], comments)
if len (casted_comments) > len (fallback_comments):
fallback_comments = casted_comments
fork = str (thread.get ('fork', '')).lower ()
label = str (thread.get ('label', '')).lower ()
thread_id = str (thread.get ('id', '')).lower ()
if fork == 'main' or 'main' in label or 'main' in thread_id:
main_comments = casted_comments
selected_comments = main_comments or fallback_comments
deduped_comments: dict[int, CommentResult] = {}
for comment in selected_comments:
comment_no = comment.get ('no')
if not isinstance (comment_no, int):
continue
deduped_comments[comment_no] = comment
return [deduped_comments[comment_no]
for comment_no in sorted (deduped_comments)]
def search_nico_by_tags (
tags: list[str],
) -> list[VideoResult]:
) -> SearchNicoResult:
today = datetime.now ()
url = ('https://snapshot.search.nicovideo.jp'
+ '/api/v2/snapshot/video/contents/search')
result_data: list[VideoResult] = []
result_by_video_code: dict[str, VideoResult] = {}
is_complete = True
to = datetime (2022, 12, 3)
while to <= today:
time.sleep (1.2)
@@ -215,31 +298,67 @@ def search_nico_by_tags (
'startTime'),
'_limit': 100,
'jsonFilter': query_filter }
res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
try:
result_data += res['data']
except KeyError:
pass
response = requests.get (
url,
params = cast (dict[str, int | str], params),
timeout = 60)
response.raise_for_status ()
res = response.json ()
for datum in cast (list[VideoResult], res.get ('data', [])):
result_by_video_code[datum['contentId']] = datum
except (ValueError, requests.RequestException) as exc:
logger.warning ('snapshot fetch failed: %s - %s (%s)',
to.date (),
until.date (),
exc)
is_complete = False
to = until + timedelta (days = 1)
for video in TrackedVideo.get ():
if video.code in map (lambda v: v['contentId'], result_data):
if video.code in result_by_video_code:
continue
try:
video_data = fetch_video_data (video.code)['data']
result_data.append ({
'contentId': video.code,
'userId': video_data['video']['userId'],
'title': video_data['video']['title'],
tracked_video = video
video_data = fetch_video_data (tracked_video.code)['data']
owner = video_data.get ('owner') or {}
video_info = video_data['video']
result_by_video_code[tracked_video.code] = {
'contentId': tracked_video.code,
'userId': owner.get ('id'),
'title': video_info['title'],
'tags': ' '.join (map (lambda t: t['name'],
video_data['tag']['items'])),
'description': video_data['video']['description'],
'viewCounter': video_data['video']['count']['view'],
'startTime': video_data['video']['registeredAt'] })
except Exception:
pass
'description': video_info['description'],
'viewCounter': video_info['count']['view'],
'startTime': video_info['registeredAt'] }
except (KeyError,
TypeError,
ValueError,
requests.RequestException) as exc:
logger.warning ('tracked video fetch failed: %s (%s)', video.code, exc)
is_complete = False
return result_data
return { 'videos': list (result_by_video_code.values ()),
'is_complete': is_complete }
def normalise (
text: str,
) -> str:
return jaconv.hira2kata (
unicodedata.normalize ('NFKC', text.strip ())).lower ()
class SearchNicoResult (TypedDict):
videos: list['VideoResult']
is_complete: bool
class UpdateContext (TypedDict):
api_data: list['VideoResult']
comments_by_video_code: dict[str, list['CommentResult']]
deletable: bool
class VideoSearchParam (TypedDict):
@@ -262,25 +381,12 @@ class VideoResult (TypedDict):
class CommentResult (TypedDict):
id: str
no: int
vposMs: int
body: str
commands: list[str]
userId: str
isPremium: bool
score: int
body: str
postedAt: str
nicoruCount: int
nicoruId: Any
source: str
isMyPost: bool
def normalise (
s: str,
) -> str:
return jaconv.hira2kata (unicodedata.normalize ('NFKC', s)).lower ()
vposMs: int
if __name__ == '__main__':