From d735c935269dafbe48ee4cdd1d216ae506102dc4 Mon Sep 17 00:00:00 2001 From: miteruzo Date: Sat, 16 Aug 2025 04:00:20 +0900 Subject: [PATCH] #2 --- .gitmodules | 3 + main.py | 160 +++++++++++++--------------------------------------- nicolib | 1 + nizika_nico | 2 +- 4 files changed, 45 insertions(+), 121 deletions(-) create mode 160000 nicolib diff --git a/.gitmodules b/.gitmodules index ce4904a..1f0df5d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "nizika_nico"] path = nizika_nico url = https://git.miteruzo.com/miteruzo/nizika_nico.git +[submodule "nicolib"] + path = nicolib + url = https://git.miteruzo.com/miteruzo/nicolib.git diff --git a/main.py b/main.py index f1aac9e..4cf8870 100644 --- a/main.py +++ b/main.py @@ -2,15 +2,14 @@ from __future__ import annotations import asyncio import random +from asyncio import Lock from datetime import date, datetime, time, timedelta from typing import TypedDict, cast -import requests -from bs4 import BeautifulSoup -from requests.exceptions import Timeout - +import nicolib import queries_to_answers as q2a from db.models import Comment, Video, VideoHistory +from nicolib import VideoInfo from nizika_ai.consts import Character, GPTModel, QueryType from nizika_ai.models import Query @@ -22,6 +21,7 @@ KIRIBAN_VIEWS_COUNTS: list[int] = sorted ({ *range (1_000, 10_000, 1_000), reverse = True) kiriban_list: list[tuple[int, VideoInfo, datetime]] +lock = Lock () async def main ( @@ -36,20 +36,32 @@ async def main ( async def queries_to_answers ( ) -> None: while True: - q2a.main () + loop = asyncio.get_running_loop () + await loop.run_in_executor (None, q2a.main) await asyncio.sleep (10) async def report_kiriban ( ) -> None: while True: + if not kiriban_list: + await wait_until (time (15, 0)) + continue + # キリ番祝ひ - (views_count, video_info, uploaded_at) = ( - kiriban_list.pop (random.randint (0, len (kiriban_list) - 1))) + async with lock: + (views_count, video_info, uploaded_at) = ( + kiriban_list.pop (random.randint (0, len (kiriban_list) - 1))) + since_posted = datetime.now () - uploaded_at + _days = since_posted.days + _seconds = since_posted.seconds + (_hours, _seconds) = divmod (_seconds, 3600) + (_mins, _seconds) = divmod (_seconds, 60) + video_code = video_info['contentId'] uri = f"https://www.nicovideo.jp/watch/{ video_code }" - (title, description, _) = fetch_embed_info (uri) + (title, description, _) = nicolib.fetch_embed_info (uri) comments = fetch_comments (video_code) popular_comments = sorted (comments, key = lambda c: c.nico_count, @@ -57,7 +69,7 @@ async def report_kiriban ( latest_comments = sorted (comments, key = lambda c: c.posted_at, reverse = True)[:10] - prompt = f"{ since_posted.days }日と{ since_posted.seconds }秒前にニコニコに投稿された『{ video_info['title'] }』という動画が{ views_count }再生を突破しました。\n" + prompt = f"{ _days }日{ _hours }時間{ _mins }分{ _seconds }秒前にニコニコに投稿された『{ video_info['title'] }』という動画が{ views_count }再生を突破しました。\n" prompt += f"コメント数は{ len (comments) }件です。\n" if video_info['tags']: prompt += f"つけられたタグは「{ '」、「'.join (video_info['tags']) }」です。\n" @@ -82,14 +94,18 @@ async def report_kiriban ( query.answered = False query.transfer_data = { 'video_code': video_code } query.save () + # 待ち時間計算 dt = datetime.now () d = dt.date () if dt.hour >= 15: d += timedelta (days = 1) - td = datetime.combine (d, time (15, 0)) - dt - if kiriban_list: - td /= len (kiriban_list) + remain = max (len (kiriban_list), 1) + td = (datetime.combine (d, time (15, 0)) - dt) / remain + # まれに時刻跨ぎでマイナスになるため + if td.total_seconds () < 0: + td = timedelta (seconds = 0) + await asyncio.sleep (td.total_seconds ()) @@ -97,7 +113,17 @@ async def update_kiriban_list ( ) -> None: while True: await wait_until (time (15, 0)) - kiriban_list += fetch_kiriban_list (datetime.now ().date ()) + + new_list = fetch_kiriban_list (datetime.now ().date ()) + if not new_list: + continue + + async with lock: + have = { k[1]['contentId'] for k in kiriban_list } + for item in new_list: + if item[1]['contentId'] not in have: + kiriban_list.append (item) + have.add (item[1]['contentId']) def fetch_kiriban_list ( @@ -126,7 +152,7 @@ def fetch_kiriban_list ( previous_views_count = 0 if previous_views_count >= kiriban_views_count: continue - video_info = fetch_video_info (code) + video_info = nicolib.fetch_video_info (code) if video_info is not None: _kiriban_list.append ((kiriban_views_count, video_info, cast (Video, Video.where ('code', code).first ()).uploaded_at)) @@ -134,105 +160,6 @@ def fetch_kiriban_list ( return _kiriban_list -def fetch_video_info ( - video_code: str, -) -> VideoInfo | None: - video_info: dict[str, str | list[str]] = { 'contentId': video_code } - - bs = create_bs_from_url (f"https://www.nicovideo.jp/watch/{ video_code }") - if bs is None: - return None - - try: - title = bs.find ('title') - if title is None: - return None - video_info['title'] = '-'.join (title.text.split ('-')[:(-1)])[:(-1)] - - tags: str = bs.find ('meta', attrs = { 'name': 'keywords' }).get ('content') # type: ignore - video_info['tags'] = tags.split (',') - - video_info['description'] = bs.find ('meta', attrs = { 'name': 'description' }).get ('content') # type: ignore - except Exception: - return None - - return cast (VideoInfo, video_info) - - -def create_bs_from_url ( - url: str, - params: dict | None = None, -) -> BeautifulSoup | None: - """ - URL から BeautifulSoup インスタンス生成 - - Parameters - ---------- - url: str - 捜査する URL - params: dict - パラメータ - - Return - ------ - BeautifulSoup | None - BeautifulSoup オブゼクト(失敗したら None) - """ - - if params is None: - params = { } - - try: - req = requests.get (url, params = params, timeout = 60) - except Timeout: - return None - - if req.status_code != 200: - return None - - req.encoding = req.apparent_encoding - - return BeautifulSoup (req.text, 'hecoml.parser') - - -def fetch_embed_info ( - url: str, -) -> tuple[str, str, str]: - title: str = '' - description: str = '' - thumbnail: str = '' - - try: - res = requests.get (url, timeout = 60) - except Timeout: - return ('', '', '') - - if res.status_code != 200: - return ('', '', '') - - soup = BeautifulSoup (res.text, 'html.parser') - - tmp = soup.find ('title') - if tmp is not None: - title = tmp.text - - tmp = soup.find ('meta', attrs = { 'name': 'description' }) - if tmp is not None and hasattr (tmp, 'get'): - try: - description = cast (str, tmp.get ('content')) - except Exception: - pass - - tmp = soup.find ('meta', attrs = { 'name': 'thumbnail' }) - if tmp is not None and hasattr (tmp, 'get'): - try: - thumbnail = cast (str, tmp.get ('content')) - except Exception: - pass - - return (title, description, thumbnail) - - def fetch_comments ( video_code: str, ) -> list[Comment]: @@ -257,13 +184,6 @@ async def wait_until ( await asyncio.sleep ((datetime.combine (d, t) - dt).total_seconds ()) -class VideoInfo (TypedDict): - contentId: str - title: str - tags: list[str] - description: str - - kiriban_list = ( fetch_kiriban_list ((d := datetime.now ()).date () - timedelta (days = d.hour < 15))) diff --git a/nicolib b/nicolib new file mode 160000 index 0000000..b7a88cc --- /dev/null +++ b/nicolib @@ -0,0 +1 @@ +Subproject commit b7a88cc774aa7869678c00abf9f93982e5b6cfb9 diff --git a/nizika_nico b/nizika_nico index b2f5f81..baa75d6 160000 --- a/nizika_nico +++ b/nizika_nico @@ -1 +1 @@ -Subproject commit b2f5f81ca8615781d79d807fe92f7824653cea22 +Subproject commit baa75d68ba7150775eb024ebab45f3059411ac70