ニジカのスカトロ,ニジカトロ. https://bsky.app/profile/deerjika-bot.bsky.social
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

nico.py 4.7 KiB

2 months ago
2 months ago
2 months ago
2 months ago
2 months ago
2 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. """
  2. ニコニコのニジカ動画取得モヂュール
  3. """
  4. import os
  5. from datetime import date, timedelta
  6. from typing import TypedDict, cast
  7. import requests
  8. from bs4 import BeautifulSoup
  9. from requests.exceptions import Timeout
  10. from eloquent import DatabaseManager, Model
  11. from db.models import Tag, Video, VideoHistory, VideoTag
  12. CONFIG: dict[str, DbConfig] = { 'mysql': { 'driver': 'mysql',
  13. 'host': 'localhost',
  14. 'database': 'nizika_nico',
  15. 'user': os.environ['MYSQL_USER'],
  16. 'password': os.environ['MYSQL_PASS'],
  17. 'prefix': '' } }
  18. DB = DatabaseManager (CONFIG)
  19. Model.set_connection_resolver (DB)
  20. KIRIBAN_VIEWS_COUNTS: set[int] = { *range (100, 1_000, 100),
  21. *range (1_000, 10_000, 1_000),
  22. *range (10_000, 1_000_001, 10_000),
  23. 194, 245, 510, 114_514, 1_940, 2_450, 5_100, 24_500,
  24. 51_000, 2_424 }
  25. class VideoInfo (TypedDict):
  26. contentId: str
  27. title: str
  28. tags: list[str]
  29. description: str
  30. def get_latest_deerjika (
  31. ) -> VideoInfo | None:
  32. tag = '伊地知ニジカ OR ぼざろクリーチャーシリーズ'
  33. url = f"https://www.nicovideo.jp/tag/{ tag }"
  34. params = { 'sort': 'f',
  35. 'order': 'd' }
  36. video_info = { }
  37. bs = get_bs_from_url (url, params)
  38. if bs is None:
  39. return None
  40. try:
  41. video = (bs.find_all ('ul', class_ = 'videoListInner')[1]
  42. .find ('li', class_ = 'item'))
  43. video_info['contentId'] = video['data-video-id']
  44. except Exception:
  45. return None
  46. return get_video_info (video_info['contentId'])
  47. def get_bs_from_url (
  48. url: str,
  49. params: dict = { },
  50. ) -> BeautifulSoup | None:
  51. """
  52. URL から BeautifulSoup インスタンス生成
  53. Parameters
  54. ----------
  55. url: str
  56. 捜査する URL
  57. params: dict
  58. パラメータ
  59. Return
  60. ------
  61. BeautifulSoup | None
  62. BeautifulSoup オブゼクト(失敗したら None)
  63. """
  64. try:
  65. req = requests.get (url, params = params, timeout = 60)
  66. except Timeout:
  67. return None
  68. if req.status_code != 200:
  69. return None
  70. req.encoding = req.apparent_encoding
  71. return BeautifulSoup (req.text, 'html.parser')
  72. def get_video_info (
  73. video_code: str,
  74. ) -> VideoInfo | None:
  75. video_info: dict[str, str | list[str]] = { 'contentId': video_code }
  76. bs = get_bs_from_url (f"https://www.nicovideo.jp/watch/{ video_code }")
  77. if bs is None:
  78. return None
  79. try:
  80. title = bs.find ('title')
  81. if title is None:
  82. return None
  83. video_info['title'] = '-'.join (title.text.split ('-')[:(-1)])[:(-1)]
  84. tags: str = bs.find ('meta', attrs = { 'name': 'keywords' }).get ('content') # type: ignore
  85. video_info['tags'] = tags.split (',')
  86. video_info['description'] = bs.find ('meta', attrs = { 'name': 'description' }).get ('content') # type: ignore
  87. except Exception:
  88. return None
  89. return cast (VideoInfo, video_info)
  90. def get_kiriban_list (
  91. base_date: date,
  92. ) -> list[tuple[int, VideoInfo]]:
  93. kiriban_list: list[tuple[int, VideoInfo]] = []
  94. latest_fetched_at = cast (date, VideoHistory.max ('fetched_at'))
  95. previous_fetched_at = cast (date, (VideoHistory
  96. .where ('fetched_at', '<', latest_fetched_at)
  97. .max ('fetched_at')))
  98. for kiriban_views_count in KIRIBAN_VIEWS_COUNTS:
  99. targets = ({ vh.video.code for vh in (VideoHistory
  100. .where ('fetched_at', latest_fetched_at)
  101. .where ('views_count', '>=', kiriban_views_count)
  102. .get ()) }
  103. - { vh.video.code for vh in (VideoHistory
  104. .where ('fetched_at', previous_fetched_at)
  105. .where ('views_count', '>=', kiriban_views_count)
  106. .get ()) })
  107. for code in targets:
  108. video_info = get_video_info (code)
  109. if video_info is not None:
  110. kiriban_list.append ((kiriban_views_count, video_info))
  111. return kiriban_list
  112. class DbConfig (TypedDict):
  113. driver: str
  114. host: str
  115. database: str
  116. user: str
  117. password: str
  118. prefix: str