From 54f7f3d8c528df0443c578ab399a24d91f01dda7 Mon Sep 17 00:00:00 2001 From: miteruzo Date: Wed, 8 Jan 2025 22:48:50 +0900 Subject: [PATCH] =?UTF-8?q?#1=20=E3=81=BC=E3=81=A1=E3=81=BC=E3=81=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitmodules | 3 + .../queries_to_answers.cpython-313.pyc | Bin 0 -> 4464 bytes db | 1 + main.py | 186 +++++++++++++++--- nizika_nico | 1 + 5 files changed, 162 insertions(+), 29 deletions(-) create mode 100644 __pycache__/queries_to_answers.cpython-313.pyc create mode 120000 db create mode 160000 nizika_nico diff --git a/.gitmodules b/.gitmodules index 6cd7edd..ce4904a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "nizika_ai"] path = nizika_ai url = https://git.miteruzo.com/miteruzo/nizika_ai +[submodule "nizika_nico"] + path = nizika_nico + url = https://git.miteruzo.com/miteruzo/nizika_nico.git diff --git a/__pycache__/queries_to_answers.cpython-313.pyc b/__pycache__/queries_to_answers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb9fc65f1bb52f2fe87e7a9230d84b1c5cbfcd82 GIT binary patch literal 4464 zcmai1O>7&-6`tiTcb9+qqeT5nv}`w~TgwhC#f@b-uw&U4X^AdZENqa@Voj|mI@D5` zUD>hFz;x~fck z%`_u%=FNL=-n^N4^Ud3*fq)l5iT>k4_CG*bQ!)vEKy^GcSuf3~9H%Y;knDh6xZoRWV%Q}fAe1gOD#=ILiHdNL_TmVo}VrExte z&Xy*4o2s|4H;m*Ouh~d=>Jpo>cSb`=w5!IPXAsJwlo>~qmGIlz^>zgG$3c%4jY%?z z7K?7IHHapS1zB4o*P_pZ&7>>Q@w`N1_acphQJv4rg6QPWVLo@#od`WIJr4^=a*5vO zB@)|wM53>WE;s73R~G(0d(?v;96JLM=!2L`4cN0JhHR?dHb}#Du1$@(V9k4B%{=VL zh|R6HNj|Zo-m|Gaa5Bzm!s*X!M=14zEj<%DGd5m}n=qPr(ehG)8S>7a!)!mc_3N!2 zEwCnsnfQrAIz(fHW>G|#C8AsKTcNkWML1l@t3=2YG(*)4;!$)xryGi9%tg8} z4QA4QUd`q-*}yOut{U7@Ohb{lFDl|`U%)w4C+;`Q3&|_X%c?eygD?mKRv;oCo0>{Qr(;J5e-i_w^MLE$><{0W`2EB^@BOKb{)6TI!=?Vi z8~sO$C;sK@sE93_!QS=Bo0E56*a+?}PF31N>wP!+ipMGg;o@{f@UOjf^{tW+u5|XS zPv4ln?x_rK1Aefl9E>~&My?ALU&n7J-<>S^c2xXbW&c3QKX8x#_`s)=pG=k~PM6?+ zDDl8ARmAqP*i#aF?#4bo@oD^%c&TsZb8+^GfI4??iKshV4(%$1cKs<*-hHsN``|{` z1We^?EBm&UeB16$|1tiDc-gnVMKrNo2_u3wdmF8 zwV6$hUz>h=`khlxVLMR9fl=rH;uo zYEc5<+3V7{RPC8Y&a(x8hnm5A(w%qwh70QecHLFbaGr&f1JTU@c?6v~ngp;;Igs28 zfDfKTNOHf-fnC@etAKA!z!)^>T{TO}fy#sfg|-aA18&0#jA^y`0I=I^y58EF06+j~ zR!O?+a1CI?&l1j1-!v$~81PbKKu<(Gxg{m5$}2b@ap8Ty!2o+^3Xm-*MqyE2D$J{q z4wGxa0o_gF0t^vNtN=8@6o2t9)G}fZg%yqpS^kZu5f?4GJ4>z zFPA4`CHNnWJz<&93Haqsi?wv*J4*=*Jd}l&~5%9 zH;ngyqZucVphLIH%!69Yt2cyFjqhKpcDwjl#>S~e;~`T zGbhLc8m&7Q$+nG9&3ga$T5pmA)vaeC-`a-ixtxF$*o80}2L1w?WQWlL6Jcf}9!#+b zzXTQWOud;=m+7NO0%zmNx#ZC)IWcwm)LGL0TwGzgVj|SkNZ{zH92QVuJf45rh-dpP4 z`&nq;hOlo_?7jWc2R|)u-(T9k|A9DG5jy^l!b5Sa$|CW_|89AZzw0St+^t`TEfAdU z)^BzCg1+5#BmYqq?mLX*{av1cYPW zQ#BKLT{Pp>TrrJr(o(l> z!1rsz8@QdiV}gVkjC3pUDO!E^9EfgYsvBCc17ifBwn8qZ^ZMm2 a1Pxf?{MT1E*xoNV@tqTZQCy!HH~#~pENyoH literal 0 HcmV?d00001 diff --git a/db b/db new file mode 120000 index 0000000..fb0d23d --- /dev/null +++ b/db @@ -0,0 +1 @@ +./nizika_nico/db \ No newline at end of file diff --git a/main.py b/main.py index 7cb0141..b358f9c 100644 --- a/main.py +++ b/main.py @@ -1,57 +1,185 @@ from __future__ import annotations import asyncio -from datetime import date, datetime, time +from datetime import date, datetime, time, timedelta +from typing import TypedDict, cast + +import requests +from bs4 import BeautifulSoup +from requests.exceptions import Timeout + +import queries_to_answers as q2a +from db.models import Video, VideoHistory + +KIRIBAN_VIEWS_COUNTS: list[int] = sorted ({ *range (1_000, 10_000, 1_000), + *range (10_000, 1_000_001, 10_000), + 114_514, 1_940, 2_450, 5_100, + 19_400, 24_500, 51_000, 93_194, 2_424, 242_424, 1_919, + 4_545, 194_245, 245_194, 510_245 }, + reverse = True) + +kiriban_list: list[tuple[int, VideoInfo, datetime]] async def main ( ) -> None: - ... + await asyncio.gather ( + queries_to_answers (), + report_kiriban (), + report_nico (), + update_kiriban_list ()) async def queries_to_answers ( ) -> None: - ... + while True: + q2a.main () + await asyncio.sleep (10) -async def kiriban ( +async def report_kiriban ( ) -> None: - ... - - -async def report_nico ( + while True: + # キリ番祝ひ + ... + # 待ち時間計算 + dt = datetime.now () + d = dt.date () + if dt.hour >= 15: + d += timedelta (days = 1) + td = datetime.combine (d, time (15, 0)) - dt + if kiriban_list: + td /= len (kiriban_list) + await asyncio.sleep (td.total_seconds ()) + + +async def update_kiriban_list ( ) -> None: - ... + while True: + await wait_until (time (15, 0)) + kiriban_list += fetch_kiriban_list (datetime.now ().date ()) + + +def fetch_kiriban_list ( + base_date: date, +) -> list[tuple[int, VideoInfo, datetime]]: + _kiriban_list: list[tuple[int, VideoInfo, datetime]] = [] + + latest_fetched_at = cast (date, (VideoHistory + .where ('fetched_at', '<=', base_date) + .max ('fetched_at'))) + + for kiriban_views_count in KIRIBAN_VIEWS_COUNTS: + targets = { vh.video.code for vh in (VideoHistory + .where ('fetched_at', latest_fetched_at) + .where ('views_count', '>=', kiriban_views_count) + .get ()) } + for code in targets: + if code in [kiriban[1]['contentId'] for kiriban in _kiriban_list]: + continue + previous_views_count: int | None = ( + VideoHistory + .where_has ('video', lambda q: q.where ('code', code)) + .where ('fetched_at', '<', latest_fetched_at) + .max ('views_count')) + if previous_views_count is None: + previous_views_count = 0 + if previous_views_count >= kiriban_views_count: + continue + video_info = fetch_video_info (code) + if video_info is not None: + _kiriban_list.append ((kiriban_views_count, video_info, + cast (Video, Video.where ('code', code).first ()).uploaded_at)) + + return _kiriban_list + + +def fetch_video_info ( + video_code: str, +) -> VideoInfo | None: + video_info: dict[str, str | list[str]] = { 'contentId': video_code } + + bs = create_bs_from_url (f"https://www.nicovideo.jp/watch/{ video_code }") + if bs is None: + return None + + try: + title = bs.find ('title') + if title is None: + return None + video_info['title'] = '-'.join (title.text.split ('-')[:(-1)])[:(-1)] + + tags: str = bs.find ('meta', attrs = { 'name': 'keywords' }).get ('content') # type: ignore + video_info['tags'] = tags.split (',') + + video_info['description'] = bs.find ('meta', attrs = { 'name': 'description' }).get ('content') # type: ignore + except Exception: + return None + + return cast (VideoInfo, video_info) + + +def create_bs_from_url ( + url: str, + params: dict | None = None, +) -> BeautifulSoup | None: + """ + URL から BeautifulSoup インスタンス生成 + + Parameters + ---------- + url: str + 捜査する URL + params: dict + パラメータ + + Return + ------ + BeautifulSoup | None + BeautifulSoup オブゼクト(失敗したら None) + """ + + if params is None: + params = { } + + try: + req = requests.get (url, params = params, timeout = 60) + except Timeout: + return None + + if req.status_code != 200: + return None + + req.encoding = req.apparent_encoding + + return BeautifulSoup (req.text, 'hecoml.parser') -async def schedule_task ( - dt_tuple: tuple[int | None, int | None, int | None, int | None, int | None, int | None], +async def report_nico ( ) -> None: ... -def dt_to_tuple ( - dt: datetime | date | time, -) -> tuple[int | None, int | None, int | None, int | None, int | None, int | None]: - year: int | None = None - month: int | None = None - day: int | None = None - hour: int | None = None - minute: int | None = None - second: int | None = None +async def wait_until ( + t: time, +): + dt = datetime.now () + d = dt.date () + if dt.time () >= t: + d += timedelta (days = 1) + await asyncio.sleep ((datetime.combine (d, t) - dt).total_seconds ()) - if not isinstance (dt, time): - year = dt.year - month = dt.month - day = dt.day - if not isinstance (dt, date): - hour = dt.hour - minute = dt.minute - second = dt.second +class VideoInfo (TypedDict): + contentId: str + title: str + tags: list[str] + description: str - return (year, month, day, hour, minute, second) +kiriban_list = ( + fetch_kiriban_list ((d := datetime.now ()).date () + - timedelta (days = d.hour < 15))) if __name__ == '__main__': asyncio.run (main ()) diff --git a/nizika_nico b/nizika_nico new file mode 160000 index 0000000..b2f5f81 --- /dev/null +++ b/nizika_nico @@ -0,0 +1 @@ +Subproject commit b2f5f81ca8615781d79d807fe92f7824653cea22