ぼざろクリーチャーシリーズ DB 兼 API(自分用)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

update_db.py 11 KiB

2 months ago
2 months ago
2 months ago
2 months ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
2 months ago
2 months ago
2 months ago
2 months ago
2 months ago

  1. """
  2. 日次で実行し,ぼざクリ DB を最新に更新する.
  3. """
  4. from __future__ import annotations
  5. import json
  6. import os
  7. import random
  8. import string
  9. import time
  10. import unicodedata
  11. from dataclasses import dataclass
  12. from datetime import date, datetime, timedelta
  13. from typing import Any, Type, TypedDict, cast
  14. import requests
  15. from eloquent import DatabaseManager, Model
  16. from eloquent.orm.relations.dynamic_property import DynamicProperty
  17. config: dict[str, DbConfig] = { 'mysql': { 'driver': 'mysql',
  18. 'host': 'localhost',
  19. 'database': 'nizika_nico',
  20. 'user': os.environ['MYSQL_USER'],
  21. 'password': os.environ['MYSQL_PASS'],
  22. 'prefix': '' } }
  23. db = DatabaseManager (config)
  24. Model.set_connection_resolver (db)
  25. def main (
  26. ) -> None:
  27. now = datetime.now ()
  28. api_data = search_nico_by_tags (['伊地知ニジカ', 'ぼざろクリーチャーシリーズ'])
  29. update_tables (api_data, now)
  30. def update_tables (
  31. api_data: list[VideoResult],
  32. now: datetime,
  33. ) -> None:
  34. alive_video_codes: list[str] = []
  35. for datum in api_data:
  36. tag_names: list[str] = datum['tags'].split ()
  37. video = Video ()
  38. video.code = datum['contentId']
  39. video.title = datum['title']
  40. video.description = datum['description'] or ''
  41. video.uploaded_at = datetime.fromisoformat (datum['startTime'])
  42. video.deleted_at = None
  43. video.upsert ()
  44. alive_video_codes.append (video.code)
  45. video_history = VideoHistory ()
  46. video_history.video_id = video.id
  47. video_history.fetched_at = now
  48. video_history.views_count = datum['viewCounter']
  49. video_history.save ()
  50. video_tags = video.video_tags.where_not_null ('untagged_at').get ()
  51. for video_tag in video_tags:
  52. tag = video_tag.tag
  53. if (tag is not None
  54. and (normalise (tag.name) not in map (normalise, tag_names))):
  55. video_tag.untagged_at = now
  56. video_tag.save ()
  57. for tag_name in tag_names:
  58. tag = Tag.where ('name', tag_name).first ()
  59. if tag is None:
  60. tag = Tag ()
  61. tag.name = tag_name
  62. tag.save ()
  63. video_tag = (Video.where ('video_id', video.id)
  64. .where ('tag_id', tag.id)
  65. .where_null ('untagged_at')
  66. .first ())
  67. if video_tag is None:
  68. video_tag = VideoTag ()
  69. video_tag.video_id = video.id
  70. video_tag.tag_id = tag.id
  71. video_tag.tagged_at = now
  72. video_tag.untagged_at = None
  73. video_tag.save ()
  74. for com in fetch_comments (video.code):
  75. user = User.where ('code', com['userId']).first ()
  76. if user is None:
  77. user = User ()
  78. user.code = com['userId']
  79. user.save ()
  80. comment = Comment ()
  81. comment.video_id = video.id
  82. comment.comment_no = com['no']
  83. comment.user_id = user.id
  84. comment.content = com['body']
  85. comment.posted_at = datetime.fromisoformat (com['postedAt'])
  86. comment.nico_count = com['nicoruCount']
  87. comment.vpos_ms = com['vposMs']
  88. comment.upsert ()
  89. # 削除動画
  90. videos = (Video.where_not_in ('code', alive_video_codes)
  91. .where_null ('deleted_at')
  92. .get ())
  93. for video in videos:
  94. if video.code not in alive_video_codes:
  95. video.deleted_at = now
  96. video.save ()
  97. def fetch_comments (
  98. video_code: str,
  99. ) -> list[CommentResult]:
  100. time.sleep (1.2)
  101. headers = { 'X-Frontend-Id': '6',
  102. 'X-Frontend-Version': '0' }
  103. action_track_id = (
  104. ''.join (random.choice (string.ascii_letters + string.digits)
  105. for _ in range (10))
  106. + '_'
  107. + str (random.randrange (10 ** 12, 10 ** 13)))
  108. url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
  109. + f"?actionTrackId={ action_track_id }")
  110. res = requests.post (url, headers = headers, timeout = 60).json ()
  111. try:
  112. nv_comment = res['data']['comment']['nvComment']
  113. except KeyError:
  114. return []
  115. if nv_comment is None:
  116. return []
  117. headers = { 'X-Frontend-Id': '6',
  118. 'X-Frontend-Version': '0',
  119. 'Content-Type': 'application/json' }
  120. params = { 'params': nv_comment['params'],
  121. 'additionals': { },
  122. 'threadKey': nv_comment['threadKey'] }
  123. url = nv_comment['server'] + '/v1/threads'
  124. res = (requests.post (url, json.dumps (params),
  125. headers = headers,
  126. timeout = 60)
  127. .json ())
  128. try:
  129. return res['data']['threads'][1]['comments']
  130. except (IndexError, KeyError):
  131. return []
  132. def search_nico_by_tag (
  133. tag: str,
  134. ) -> list[VideoResult]:
  135. return search_nico_by_tags ([tag])
  136. def search_nico_by_tags (
  137. tags: list[str],
  138. ) -> list[VideoResult]:
  139. today = datetime.now ()
  140. url = ('https://snapshot.search.nicovideo.jp'
  141. + '/api/v2/snapshot/video/contents/search')
  142. result_data: list[VideoResult] = []
  143. to = datetime (2022, 12, 3)
  144. while to <= today:
  145. time.sleep (1.2)
  146. until = to + timedelta (days = 14)
  147. query_filter = json.dumps ({ 'type': 'or',
  148. 'filters': [
  149. { 'type': 'range',
  150. 'field': 'startTime',
  151. 'from': '%04d-%02d-%02dT00:00:00+09:00' % (to.year, to.month, to.day),
  152. 'to': '%04d-%02d-%02dT23:59:59+09:00' % (until.year, until.month, until.day),
  153. 'include_lower': True }] })
  154. params: VideoSearchParam = { 'q': ' OR '.join (tags),
  155. 'targets': 'tagsExact',
  156. '_sort': '-viewCounter',
  157. 'fields': ('contentId,'
  158. 'title,'
  159. 'tags,'
  160. 'description,'
  161. 'viewCounter,'
  162. 'startTime'),
  163. '_limit': 100,
  164. 'jsonFilter': query_filter }
  165. res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
  166. try:
  167. result_data += res['data']
  168. except KeyError:
  169. pass
  170. to = until + timedelta (days = 1)
  171. return result_data
  172. class Comment (Model):
  173. __timestamps__ = False
  174. video_id: int
  175. comment_no: int
  176. user_id: int
  177. content: str
  178. posted_at: datetime
  179. nico_count: int
  180. vpos_ms: int
  181. @property
  182. def video (
  183. self,
  184. ) -> DynamicProperty:
  185. return self.belongs_to (Video)
  186. @property
  187. def user (
  188. self,
  189. ) -> DynamicProperty:
  190. return self.belongs_to (User)
  191. def upsert (
  192. self,
  193. ) -> None:
  194. row = (Comment.where ('video_id', self.video_id)
  195. .where ('comment_no', self.comment_no)
  196. .first ())
  197. if row is not None:
  198. self.id = row.id
  199. self.save ()
  200. class Tag (Model):
  201. __timestamps__ = False
  202. name: str
  203. @property
  204. def video_tags (
  205. self,
  206. ) -> DynamicProperty:
  207. return self.has_many (VideoTag)
  208. class User (Model):
  209. __timestamps__ = False
  210. code: str
  211. @property
  212. def comments (
  213. self,
  214. ) -> DynamicProperty:
  215. return self.has_many (Comment)
  216. class Video (Model):
  217. __timestamps__ = False
  218. code: str
  219. title: str
  220. description: str
  221. uploaded_at: datetime
  222. deleted_at: datetime | None
  223. @property
  224. def video_histories (
  225. self,
  226. ) -> DynamicProperty:
  227. return self.has_many (VideoHistory)
  228. @property
  229. def video_tags (
  230. self,
  231. ) -> DynamicProperty:
  232. return self.has_many (VideoTag)
  233. @property
  234. def comments (
  235. self,
  236. ) -> DynamicProperty:
  237. return self.has_many (Comment)
  238. def upsert (
  239. self,
  240. ) -> None:
  241. row = Video.where ('code', self.code).first ()
  242. if row is not None:
  243. self.id = row.id
  244. self.save ()
  245. class VideoHistory (Model):
  246. __timestamps__ = False
  247. video_id: int
  248. fetched_at: date
  249. views_count: int
  250. @property
  251. def video (
  252. self,
  253. ) -> DynamicProperty:
  254. return self.belongs_to (Video)
  255. def upsert (
  256. self,
  257. ) -> None:
  258. row = (VideoHistory.where ('video_id', self.video_id)
  259. .where ('fetched_at', self.fetched_at)
  260. .first ())
  261. if row is not None:
  262. self.id = row.id
  263. self.save ()
  264. class VideoTag (Model):
  265. __timestamps__ = False
  266. video_id: int
  267. tag_id: int
  268. tagged_at: date
  269. untagged_at: date | None
  270. @property
  271. def video (
  272. self,
  273. ) -> DynamicProperty:
  274. return self.belongs_to (Video)
  275. @property
  276. def tag (
  277. self,
  278. ) -> DynamicProperty:
  279. return self.belongs_to (Tag)
  280. def upsert (
  281. self,
  282. ) -> None:
  283. row = (VideoTag.where ('video_id', self.video_id)
  284. .where ('tag_id', self.tag_id)
  285. .first ())
  286. if row is not None:
  287. self.id = row.id
  288. self.save ()
  289. class DbConfig (TypedDict):
  290. driver: str
  291. host: str
  292. database: str
  293. user: str
  294. password: str
  295. prefix: str
  296. class VideoSearchParam (TypedDict):
  297. q: str
  298. targets: str
  299. _sort: str
  300. fields: str
  301. _limit: int
  302. jsonFilter: str
  303. class VideoResult (TypedDict):
  304. contentId: str
  305. title: str
  306. tags: str
  307. description: str | None
  308. viewCounter: int
  309. startTime: str
  310. class CommentResult (TypedDict):
  311. id: str
  312. no: int
  313. vposMs: int
  314. body: str
  315. commands: list[str]
  316. userId: str
  317. isPremium: bool
  318. score: int
  319. postedAt: str
  320. nicoruCount: int
  321. nicoruId: Any
  322. source: str
  323. isMyPost: bool
  324. def normalise (
  325. s: str,
  326. ) -> str:
  327. return unicodedata.normalize ('NFKC', s).lower ()
  328. if __name__ == '__main__':
  329. main ()