|
|
@@ -2,19 +2,32 @@ |
|
|
|
ニコニコのニジカ動画取得モヂュール |
|
|
|
""" |
|
|
|
|
|
|
|
import os |
|
|
|
from datetime import date, timedelta |
|
|
|
from typing import TypedDict, cast |
|
|
|
|
|
|
|
import requests |
|
|
|
from bs4 import BeautifulSoup |
|
|
|
from requests.exceptions import Timeout |
|
|
|
from eloquent import DatabaseManager, Model |
|
|
|
|
|
|
|
from db.models import Video |
|
|
|
from db.models import Tag, Video, VideoHistory, VideoTag |
|
|
|
|
|
|
|
KIRIBAN_VIEWS_COUNTS = { *range (100, 1_000, 100), |
|
|
|
*range (1_000, 10_000, 1_000), |
|
|
|
*range (10_000, 1_000_001, 10_000), |
|
|
|
194, 245, 510, 114_514, 1_940, 2_450, 5_100, 24_500, |
|
|
|
51_000, 2_424 } |
|
|
|
CONFIG: dict[str, DbConfig] = { 'mysql': { 'driver': 'mysql', |
|
|
|
'host': 'localhost', |
|
|
|
'database': 'nizika_nico', |
|
|
|
'user': os.environ['MYSQL_USER'], |
|
|
|
'password': os.environ['MYSQL_PASS'], |
|
|
|
'prefix': '' } } |
|
|
|
|
|
|
|
DB = DatabaseManager (CONFIG) |
|
|
|
Model.set_connection_resolver (DB) |
|
|
|
|
|
|
|
KIRIBAN_VIEWS_COUNTS: set[int] = { *range (100, 1_000, 100), |
|
|
|
*range (1_000, 10_000, 1_000), |
|
|
|
*range (10_000, 1_000_001, 10_000), |
|
|
|
194, 245, 510, 114_514, 1_940, 2_450, 5_100, 24_500, |
|
|
|
51_000, 2_424 } |
|
|
|
|
|
|
|
|
|
|
|
class VideoInfo (TypedDict): |
|
|
@@ -46,25 +59,7 @@ def get_latest_deerjika ( |
|
|
|
except Exception: |
|
|
|
return None |
|
|
|
|
|
|
|
bs = get_bs_from_url ('https://www.nicovideo.jp/watch/' |
|
|
|
+ video_info['contentId']) |
|
|
|
if bs is None: |
|
|
|
return None |
|
|
|
|
|
|
|
try: |
|
|
|
title = bs.find ('title') |
|
|
|
if title is None: |
|
|
|
return None |
|
|
|
video_info['title'] = '-'.join (title.text.split ('-')[:(-1)])[:(-1)] |
|
|
|
|
|
|
|
tags: str = bs.find ('meta', attrs = { 'name': 'keywords' }).get ('content') # type: ignore |
|
|
|
video_info['tags'] = tags.split (',') |
|
|
|
|
|
|
|
video_info['description'] = bs.find ('meta', attrs = { 'name': 'description' }).get ('content') # type: ignore |
|
|
|
except Exception: |
|
|
|
return None |
|
|
|
|
|
|
|
return cast (VideoInfo, video_info) |
|
|
|
return get_video_info (video_info['contentId']) |
|
|
|
|
|
|
|
|
|
|
|
def get_bs_from_url ( |
|
|
@@ -100,8 +95,62 @@ def get_bs_from_url ( |
|
|
|
return BeautifulSoup (req.text, 'html.parser') |
|
|
|
|
|
|
|
|
|
|
|
def get_video_info ( |
|
|
|
video_code: str, |
|
|
|
) -> VideoInfo | None: |
|
|
|
video_info: dict[str, str | list[str]] = { 'contentId': video_code } |
|
|
|
|
|
|
|
bs = get_bs_from_url (f"https://www.nicovideo.jp/watch/{ video_code }") |
|
|
|
if bs is None: |
|
|
|
return None |
|
|
|
|
|
|
|
try: |
|
|
|
title = bs.find ('title') |
|
|
|
if title is None: |
|
|
|
return None |
|
|
|
video_info['title'] = '-'.join (title.text.split ('-')[:(-1)])[:(-1)] |
|
|
|
|
|
|
|
tags: str = bs.find ('meta', attrs = { 'name': 'keywords' }).get ('content') # type: ignore |
|
|
|
video_info['tags'] = tags.split (',') |
|
|
|
|
|
|
|
video_info['description'] = bs.find ('meta', attrs = { 'name': 'description' }).get ('content') # type: ignore |
|
|
|
except Exception: |
|
|
|
return None |
|
|
|
|
|
|
|
return cast (VideoInfo, video_info) |
|
|
|
|
|
|
|
|
|
|
|
def get_kiriban_list ( |
|
|
|
base_date: date, |
|
|
|
) -> list[tuple[int, VideoInfo]]: |
|
|
|
kiriban_list: list[tuple[int, VideoInfo]] = [] |
|
|
|
|
|
|
|
|
|
|
|
latest_fetched_at = cast (date, VideoHistory.max ('fetched_at')) |
|
|
|
previous_fetched_at = cast (date, (VideoHistory |
|
|
|
.where ('fetched_at', '<', latest_fetched_at) |
|
|
|
.max ('fetched_at'))) |
|
|
|
|
|
|
|
for kiriban_views_count in KIRIBAN_VIEWS_COUNTS: |
|
|
|
targets = ({ vh.video.code for vh in (VideoHistory |
|
|
|
.where ('fetched_at', latest_fetched_at) |
|
|
|
.where ('views_count', '>=', kiriban_views_count) |
|
|
|
.get ()) } |
|
|
|
- { vh.video.code for vh in (VideoHistory |
|
|
|
.where ('fetched_at', previous_fetched_at) |
|
|
|
.where ('views_count', '>=', kiriban_views_count) |
|
|
|
.get ()) }) |
|
|
|
for code in targets: |
|
|
|
video_info = get_video_info (code) |
|
|
|
if video_info is not None: |
|
|
|
kiriban_list.append ((kiriban_views_count, video_info)) |
|
|
|
|
|
|
|
return kiriban_list |
|
|
|
|
|
|
|
|
|
|
|
class DbConfig (TypedDict): |
|
|
|
driver: str |
|
|
|
host: str |
|
|
|
database: str |
|
|
|
user: str |
|
|
|
password: str |
|
|
|
prefix: str |