ニジカのスカトロ,ニジカトロ. https://bsky.app/profile/deerjika-bot.bsky.social
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

nico.py 4.7 KiB

2 months ago
3 weeks ago
2 months ago
2 months ago
3 weeks ago
2 months ago
2 months ago
2 months ago
3 weeks ago
3 weeks ago
3 weeks ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. """
  2. ニコニコのニジカ動画取得モヂュール
  3. """
  4. from __future__ import annotations
  5. import os
  6. from datetime import date, timedelta
  7. from typing import TypedDict, cast
  8. import requests
  9. from bs4 import BeautifulSoup
  10. from requests.exceptions import Timeout
  11. from eloquent import DatabaseManager, Model
  12. from db.models import Tag, Video, VideoHistory, VideoTag
  13. CONFIG: dict[str, DbConfig] = { 'mysql': { 'driver': 'mysql',
  14. 'host': 'localhost',
  15. 'database': 'nizika_nico',
  16. 'user': os.environ['MYSQL_USER'],
  17. 'password': os.environ['MYSQL_PASS'],
  18. 'prefix': '' } }
  19. DB = DatabaseManager (CONFIG)
  20. Model.set_connection_resolver (DB)
  21. KIRIBAN_VIEWS_COUNTS: set[int] = { *range (100, 1_000, 100),
  22. *range (1_000, 10_000, 1_000),
  23. *range (10_000, 1_000_001, 10_000),
  24. 194, 245, 510, 114_514, 1_940, 2_450, 5_100, 24_500,
  25. 51_000, 2_424 }
  26. class VideoInfo (TypedDict):
  27. contentId: str
  28. title: str
  29. tags: list[str]
  30. description: str
  31. def get_latest_deerjika (
  32. ) -> VideoInfo | None:
  33. tag = '伊地知ニジカ OR ぼざろクリーチャーシリーズ'
  34. url = f"https://www.nicovideo.jp/tag/{ tag }"
  35. params = { 'sort': 'f',
  36. 'order': 'd' }
  37. video_info = { }
  38. bs = get_bs_from_url (url, params)
  39. if bs is None:
  40. return None
  41. try:
  42. video = (bs.find_all ('ul', class_ = 'videoListInner')[1]
  43. .find ('li', class_ = 'item'))
  44. video_info['contentId'] = video['data-video-id']
  45. except Exception:
  46. return None
  47. return get_video_info (video_info['contentId'])
  48. def get_bs_from_url (
  49. url: str,
  50. params: dict = { },
  51. ) -> BeautifulSoup | None:
  52. """
  53. URL から BeautifulSoup インスタンス生成
  54. Parameters
  55. ----------
  56. url: str
  57. 捜査する URL
  58. params: dict
  59. パラメータ
  60. Return
  61. ------
  62. BeautifulSoup | None
  63. BeautifulSoup オブゼクト(失敗したら None)
  64. """
  65. try:
  66. req = requests.get (url, params = params, timeout = 60)
  67. except Timeout:
  68. return None
  69. if req.status_code != 200:
  70. return None
  71. req.encoding = req.apparent_encoding
  72. return BeautifulSoup (req.text, 'html.parser')
  73. def get_video_info (
  74. video_code: str,
  75. ) -> VideoInfo | None:
  76. video_info: dict[str, str | list[str]] = { 'contentId': video_code }
  77. bs = get_bs_from_url (f"https://www.nicovideo.jp/watch/{ video_code }")
  78. if bs is None:
  79. return None
  80. try:
  81. title = bs.find ('title')
  82. if title is None:
  83. return None
  84. video_info['title'] = '-'.join (title.text.split ('-')[:(-1)])[:(-1)]
  85. tags: str = bs.find ('meta', attrs = { 'name': 'keywords' }).get ('content') # type: ignore
  86. video_info['tags'] = tags.split (',')
  87. video_info['description'] = bs.find ('meta', attrs = { 'name': 'description' }).get ('content') # type: ignore
  88. except Exception:
  89. return None
  90. return cast (VideoInfo, video_info)
  91. def get_kiriban_list (
  92. base_date: date,
  93. ) -> list[tuple[int, VideoInfo]]:
  94. kiriban_list: list[tuple[int, VideoInfo]] = []
  95. latest_fetched_at = cast (date, VideoHistory.max ('fetched_at'))
  96. previous_fetched_at = cast (date, (VideoHistory
  97. .where ('fetched_at', '<', latest_fetched_at)
  98. .max ('fetched_at')))
  99. for kiriban_views_count in KIRIBAN_VIEWS_COUNTS:
  100. targets = ({ vh.video.code for vh in (VideoHistory
  101. .where ('fetched_at', latest_fetched_at)
  102. .where ('views_count', '>=', kiriban_views_count)
  103. .get ()) }
  104. - { vh.video.code for vh in (VideoHistory
  105. .where ('fetched_at', previous_fetched_at)
  106. .where ('views_count', '>=', kiriban_views_count)
  107. .get ()) })
  108. for code in targets:
  109. video_info = get_video_info (code)
  110. if video_info is not None:
  111. kiriban_list.append ((kiriban_views_count, video_info))
  112. return kiriban_list
  113. class DbConfig (TypedDict):
  114. driver: str
  115. host: str
  116. database: str
  117. user: str
  118. password: str
  119. prefix: str