ぼざろクリーチャーシリーズ DB 兼 API(自分用)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

update_db.py 12 KiB

2 months ago
2 months ago
2 months ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
2 months ago
2 months ago
2 months ago
2 months ago
2 months ago

  1. # pylint: disable = missing-class-docstring
  2. # pylint: disable = missing-function-docstring
  3. """
  4. 日次で実行し,ぼざクリ DB を最新に更新する.
  5. """
  6. from __future__ import annotations
  7. import json
  8. import os
  9. import random
  10. import string
  11. import time
  12. import unicodedata
  13. from datetime import date, datetime, timedelta
  14. from typing import Any, TypedDict, cast
  15. import requests
  16. from eloquent import DatabaseManager, Model
  17. config: dict[str, DbConfig] = { 'mysql': { 'driver': 'mysql',
  18. 'host': 'localhost',
  19. 'database': 'nizika_nico',
  20. 'user': os.environ['MYSQL_USER'],
  21. 'password': os.environ['MYSQL_PASS'],
  22. 'prefix': '' } }
  23. db = DatabaseManager (config)
  24. Model.set_connection_resolver (db)
  25. def main (
  26. ) -> None:
  27. now = datetime.now ()
  28. api_data = search_nico_by_tags (['伊地知ニジカ', 'ぼざろクリーチャーシリーズ'])
  29. update_tables (api_data, now)
  30. def update_tables (
  31. api_data: list[VideoResult],
  32. now: datetime,
  33. ) -> None:
  34. alive_video_codes: list[str] = []
  35. for datum in api_data:
  36. tag_names: list[str] = datum['tags'].split ()
  37. video = Video ()
  38. video.code = datum['contentId']
  39. video.title = datum['title']
  40. video.description = datum['description'] or ''
  41. video.uploaded_at = datetime.fromisoformat (datum['startTime'])
  42. video.deleted_at = None
  43. video.upsert ()
  44. alive_video_codes.append (video.code)
  45. video_history = VideoHistory ()
  46. video_history.video_id = video.id
  47. video_history.fetched_at = now
  48. video_history.views_count = datum['viewCounter']
  49. video_history.save ()
  50. video_tags = video.video_tags.where_not_null ('untagged_at').get ()
  51. tag: Tag | None
  52. video_tag: VideoTag | None
  53. for video_tag in video_tags:
  54. tag = video_tag.tag
  55. if (tag is not None
  56. and (normalise (tag.name) not in map (normalise, tag_names))):
  57. video_tag.untagged_at = now
  58. video_tag.save ()
  59. for tag_name in tag_names:
  60. tag = Tag.where ('name', tag_name).first ()
  61. if tag is None:
  62. tag = Tag ()
  63. tag.name = tag_name
  64. tag.save ()
  65. video_tag = (VideoTag.where ('video_id', video.id)
  66. .where ('tag_id', tag.id)
  67. .where_null ('untagged_at')
  68. .first ())
  69. if video_tag is None:
  70. video_tag = VideoTag ()
  71. video_tag.video_id = video.id
  72. video_tag.tag_id = tag.id
  73. video_tag.tagged_at = now
  74. video_tag.untagged_at = None
  75. video_tag.save ()
  76. for com in fetch_comments (video.code):
  77. user = User.where ('code', com['userId']).first ()
  78. if user is None:
  79. user = User ()
  80. user.code = com['userId']
  81. user.save ()
  82. comment = Comment ()
  83. comment.video_id = video.id
  84. comment.comment_no = com['no']
  85. comment.user_id = user.id
  86. comment.content = com['body']
  87. comment.posted_at = datetime.fromisoformat (com['postedAt'])
  88. comment.nico_count = com['nicoruCount']
  89. comment.vpos_ms = com['vposMs']
  90. comment.upsert ()
  91. # 削除動画
  92. videos = (Video.where_not_in ('code', alive_video_codes)
  93. .where_null ('deleted_at')
  94. .get ())
  95. for video in videos:
  96. if video.code not in alive_video_codes:
  97. video.deleted_at = now
  98. video.save ()
  99. def fetch_comments (
  100. video_code: str,
  101. ) -> list[CommentResult]:
  102. time.sleep (1.2)
  103. headers = { 'X-Frontend-Id': '6',
  104. 'X-Frontend-Version': '0' }
  105. action_track_id = (
  106. ''.join (random.choice (string.ascii_letters + string.digits)
  107. for _ in range (10))
  108. + '_'
  109. + str (random.randrange (10 ** 12, 10 ** 13)))
  110. url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
  111. + f"?actionTrackId={ action_track_id }")
  112. res = requests.post (url, headers = headers, timeout = 60).json ()
  113. try:
  114. nv_comment = res['data']['comment']['nvComment']
  115. except KeyError:
  116. return []
  117. if nv_comment is None:
  118. return []
  119. headers = { 'X-Frontend-Id': '6',
  120. 'X-Frontend-Version': '0',
  121. 'Content-Type': 'application/json' }
  122. params = { 'params': nv_comment['params'],
  123. 'additionals': { },
  124. 'threadKey': nv_comment['threadKey'] }
  125. url = nv_comment['server'] + '/v1/threads'
  126. res = (requests.post (url, json.dumps (params),
  127. headers = headers,
  128. timeout = 60)
  129. .json ())
  130. try:
  131. return res['data']['threads'][1]['comments']
  132. except (IndexError, KeyError):
  133. return []
  134. def search_nico_by_tag (
  135. tag: str,
  136. ) -> list[VideoResult]:
  137. return search_nico_by_tags ([tag])
  138. def search_nico_by_tags (
  139. tags: list[str],
  140. ) -> list[VideoResult]:
  141. today = datetime.now ()
  142. url = ('https://snapshot.search.nicovideo.jp'
  143. + '/api/v2/snapshot/video/contents/search')
  144. result_data: list[VideoResult] = []
  145. to = datetime (2022, 12, 3)
  146. while to <= today:
  147. time.sleep (1.2)
  148. until = to + timedelta (days = 14)
  149. # pylint: disable = consider-using-f-string
  150. query_filter = json.dumps ({ 'type': 'or',
  151. 'filters': [
  152. { 'type': 'range',
  153. 'field': 'startTime',
  154. 'from': ('%04d-%02d-%02dT00:00:00+09:00'
  155. % (to.year, to.month, to.day)),
  156. 'to': ('%04d-%02d-%02dT23:59:59+09:00'
  157. % (until.year, until.month, until.day)),
  158. 'include_lower': True }] })
  159. params: VideoSearchParam = { 'q': ' OR '.join (tags),
  160. 'targets': 'tagsExact',
  161. '_sort': '-viewCounter',
  162. 'fields': ('contentId,'
  163. 'title,'
  164. 'tags,'
  165. 'description,'
  166. 'viewCounter,'
  167. 'startTime'),
  168. '_limit': 100,
  169. 'jsonFilter': query_filter }
  170. res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
  171. try:
  172. result_data += res['data']
  173. except KeyError:
  174. pass
  175. to = until + timedelta (days = 1)
  176. return result_data
  177. class Comment (Model):
  178. # pylint: disable = too-many-instance-attributes
  179. id: int
  180. video_id: int
  181. comment_no: int
  182. user_id: int
  183. content: str
  184. posted_at: datetime
  185. nico_count: int
  186. vpos_ms: int
  187. __timestamps__ = False
  188. @property
  189. def video (
  190. self,
  191. ) -> Video:
  192. return self.belongs_to (Video)
  193. @property
  194. def user (
  195. self,
  196. ) -> User:
  197. return self.belongs_to (User)
  198. def upsert (
  199. self,
  200. ) -> None:
  201. row = (Comment.where ('video_id', self.video_id)
  202. .where ('comment_no', self.comment_no)
  203. .first ())
  204. if row is not None:
  205. self.id = row.id
  206. self.__exists = True # pylint: disable = unused-private-member
  207. self.save ()
  208. class Tag (Model):
  209. id: int
  210. name: str
  211. __timestamps__ = False
  212. @property
  213. def video_tags (
  214. self,
  215. ) -> VideoTag:
  216. return self.has_many (VideoTag)
  217. class User (Model):
  218. id: int
  219. code: str
  220. __timestamps__ = False
  221. @property
  222. def comments (
  223. self,
  224. ) -> Comment:
  225. return self.has_many (Comment)
  226. class Video (Model):
  227. id: int
  228. code: str
  229. title: str
  230. description: str
  231. uploaded_at: datetime
  232. deleted_at: datetime | None
  233. __timestamps__ = False
  234. @property
  235. def video_histories (
  236. self,
  237. ) -> VideoHistory:
  238. return self.has_many (VideoHistory)
  239. @property
  240. def video_tags (
  241. self,
  242. ) -> VideoTag:
  243. return self.has_many (VideoTag)
  244. @property
  245. def comments (
  246. self,
  247. ) -> Comment:
  248. return self.has_many (Comment)
  249. def upsert (
  250. self,
  251. ) -> None:
  252. row = Video.where ('code', self.code).first ()
  253. if row is not None:
  254. self.id = row.id
  255. self.__exists = True # pylint: disable = unused-private-member
  256. self.save ()
  257. class VideoHistory (Model):
  258. id: int
  259. video_id: int
  260. fetched_at: date
  261. views_count: int
  262. __timestamps__ = False
  263. @property
  264. def video (
  265. self,
  266. ) -> Video:
  267. return self.belongs_to (Video)
  268. def upsert (
  269. self,
  270. ) -> None:
  271. row = (VideoHistory.where ('video_id', self.video_id)
  272. .where ('fetched_at', self.fetched_at)
  273. .first ())
  274. if row is not None:
  275. self.id = row.id
  276. self.__exists = True # pylint: disable = unused-private-member
  277. self.save ()
  278. class VideoTag (Model):
  279. id: int
  280. video_id: int
  281. tag_id: int
  282. tagged_at: date
  283. untagged_at: date | None
  284. __timestamps__ = False
  285. @property
  286. def video (
  287. self,
  288. ) -> Video:
  289. return self.belongs_to (Video)
  290. @property
  291. def tag (
  292. self,
  293. ) -> Tag:
  294. return self.belongs_to (Tag)
  295. def upsert (
  296. self,
  297. ) -> None:
  298. row = (VideoTag.where ('video_id', self.video_id)
  299. .where ('tag_id', self.tag_id)
  300. .first ())
  301. if row is not None:
  302. self.id = row.id
  303. self.__exists = True # pylint: disable = unused-private-member
  304. self.save ()
  305. class DbConfig (TypedDict):
  306. driver: str
  307. host: str
  308. database: str
  309. user: str
  310. password: str
  311. prefix: str
  312. class VideoSearchParam (TypedDict):
  313. q: str
  314. targets: str
  315. _sort: str
  316. fields: str
  317. _limit: int
  318. jsonFilter: str
  319. class VideoResult (TypedDict):
  320. contentId: str
  321. title: str
  322. tags: str
  323. description: str | None
  324. viewCounter: int
  325. startTime: str
  326. class CommentResult (TypedDict):
  327. id: str
  328. no: int
  329. vposMs: int
  330. body: str
  331. commands: list[str]
  332. userId: str
  333. isPremium: bool
  334. score: int
  335. postedAt: str
  336. nicoruCount: int
  337. nicoruId: Any
  338. source: str
  339. isMyPost: bool
  340. def normalise (
  341. s: str,
  342. ) -> str:
  343. return unicodedata.normalize ('NFKC', s).lower ()
  344. if __name__ == '__main__':
  345. main ()