scene_manager.py 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. # -*- coding: utf-8 -*-
  2. #
  3. # PySceneDetect: Python-Based Video Scene Detector
  4. # -------------------------------------------------------------------
  5. # [ Site: https://scenedetect.com ]
  6. # [ Docs: https://scenedetect.com/docs/ ]
  7. # [ Github: https://github.com/Breakthrough/PySceneDetect/ ]
  8. #
  9. # Copyright (C) 2014-2024 Brandon Castellano <http://www.bcastell.com>.
  10. # PySceneDetect is licensed under the BSD 3-Clause License; see the
  11. # included LICENSE file, or visit one of the above pages for details.
  12. #
  13. """``scenedetect.scene_manager`` Module
  14. This module implements :class:`SceneManager`, coordinates running a
  15. :mod:`SceneDetector <scenedetect.detectors>` over the frames of a video
  16. (:mod:`VideoStream <scenedetect.video_stream>`). Video decoding is done in a separate thread to
  17. improve performance.
  18. This module also contains other helper functions (e.g. :func:`save_images`) which can be used to
  19. process the resulting scene list.
  20. ===============================================================
  21. Usage
  22. ===============================================================
  23. The following example shows basic usage of a :class:`SceneManager`:
  24. .. code:: python
  25. from scenedetect import open_video, SceneManager, ContentDetector
  26. video = open_video(video_path)
  27. scene_manager = SceneManager()
  28. scene_manager.add_detector(ContentDetector())
  29. # Detect all scenes in video from current position to end.
  30. scene_manager.detect_scenes(video)
  31. # `get_scene_list` returns a list of start/end timecode pairs
  32. # for each scene that was found.
  33. scenes = scene_manager.get_scene_list()
  34. An optional callback can also be invoked on each detected scene, for example:
  35. .. code:: python
  36. from scenedetect import open_video, SceneManager, ContentDetector
  37. # Callback to invoke on the first frame of every new scene detection.
  38. def on_new_scene(frame_img: numpy.ndarray, frame_num: int):
  39. print("New scene found at frame %d." % frame_num)
  40. video = open_video(test_video_file)
  41. scene_manager = SceneManager()
  42. scene_manager.add_detector(ContentDetector())
  43. scene_manager.detect_scenes(video=video, callback=on_new_scene)
  44. To use a `SceneManager` with a webcam/device or existing `cv2.VideoCapture` device, use the
  45. :class:`VideoCaptureAdapter <scenedetect.backends.opencv.VideoCaptureAdapter>` instead of
  46. `open_video`.
  47. =======================================================================
  48. Storing Per-Frame Statistics
  49. =======================================================================
  50. `SceneManager` can use an optional
  51. :class:`StatsManager <scenedetect.stats_manager.StatsManager>` to save frame statistics to disk:
  52. .. code:: python
  53. from scenedetect import open_video, ContentDetector, SceneManager, StatsManager
  54. video = open_video(test_video_file)
  55. scene_manager = SceneManager(stats_manager=StatsManager())
  56. scene_manager.add_detector(ContentDetector())
  57. scene_manager.detect_scenes(video=video)
  58. scene_list = scene_manager.get_scene_list()
  59. print_scenes(scene_list=scene_list)
  60. # Save per-frame statistics to disk.
  61. scene_manager.stats_manager.save_to_csv(csv_file=STATS_FILE_PATH)
  62. The statsfile can be used to find a better threshold for certain inputs, or perform statistical
  63. analysis of the video.
  64. """
  65. import csv
  66. from enum import Enum
  67. from typing import Iterable, List, Tuple, Optional, Dict, Callable, Union, TextIO
  68. import threading
  69. import queue
  70. import logging
  71. import math
  72. import sys
  73. import cv2
  74. import numpy as np
  75. from scenedetect._thirdparty.simpletable import (SimpleTableCell, SimpleTableImage, SimpleTableRow,
  76. SimpleTable, HTMLPage)
  77. from scenedetect.platform import (tqdm, get_and_create_path, get_cv2_imwrite_params, Template)
  78. from scenedetect.frame_timecode import FrameTimecode
  79. from scenedetect.video_stream import VideoStream
  80. from scenedetect.scene_detector import SceneDetector, SparseSceneDetector
  81. from scenedetect.stats_manager import StatsManager, FrameMetricRegistered
  82. logger = logging.getLogger('pyscenedetect')
  83. # TODO: This value can and should be tuned for performance improvements as much as possible,
  84. # until accuracy falls, on a large enough dataset. This has yet to be done, but the current
  85. # value doesn't seem to have caused any issues at least.
  86. DEFAULT_MIN_WIDTH: int = 256
  87. """The default minimum width a frame will be downscaled to when calculating a downscale factor."""
  88. MAX_FRAME_QUEUE_LENGTH: int = 4
  89. """Maximum number of decoded frames which can be buffered while waiting to be processed."""
  90. MAX_FRAME_SIZE_ERRORS: int = 16
  91. """Maximum number of frame size error messages that can be logged."""
  92. PROGRESS_BAR_DESCRIPTION = ' Detected: %d | Progress'
  93. """Template to use for progress bar."""
  94. class Interpolation(Enum):
  95. """Interpolation method used for image resizing. Based on constants defined in OpenCV."""
  96. NEAREST = cv2.INTER_NEAREST
  97. """Nearest neighbor interpolation."""
  98. LINEAR = cv2.INTER_LINEAR
  99. """Bilinear interpolation."""
  100. CUBIC = cv2.INTER_CUBIC
  101. """Bicubic interpolation."""
  102. AREA = cv2.INTER_AREA
  103. """Pixel area relation resampling. Provides moire'-free downscaling."""
  104. LANCZOS4 = cv2.INTER_LANCZOS4
  105. """Lanczos interpolation over 8x8 neighborhood."""
  106. def compute_downscale_factor(frame_width: int, effective_width: int = DEFAULT_MIN_WIDTH) -> int:
  107. """Get the optimal default downscale factor based on a video's resolution (currently only
  108. the width in pixels is considered).
  109. The resulting effective width of the video will be between frame_width and 1.5 * frame_width
  110. pixels (e.g. if frame_width is 200, the range of effective widths will be between 200 and 300).
  111. Arguments:
  112. frame_width: Actual width of the video frame in pixels.
  113. effective_width: Desired minimum width in pixels.
  114. Returns:
  115. int: The default downscale factor to use to achieve at least the target effective_width.
  116. """
  117. assert not (frame_width < 1 or effective_width < 1)
  118. if frame_width < effective_width:
  119. return 1
  120. return frame_width // effective_width
  121. def get_scenes_from_cuts(
  122. cut_list: Iterable[FrameTimecode],
  123. start_pos: Union[int, FrameTimecode],
  124. end_pos: Union[int, FrameTimecode],
  125. base_timecode: Optional[FrameTimecode] = None,
  126. ) -> List[Tuple[FrameTimecode, FrameTimecode]]:
  127. """Returns a list of tuples of start/end FrameTimecodes for each scene based on a
  128. list of detected scene cuts/breaks.
  129. This function is called when using the :meth:`SceneManager.get_scene_list` method.
  130. The scene list is generated from a cutting list (:meth:`SceneManager.get_cut_list`),
  131. noting that each scene is contiguous, starting from the first to last frame of the input.
  132. If `cut_list` is empty, the resulting scene will span from `start_pos` to `end_pos`.
  133. Arguments:
  134. cut_list: List of FrameTimecode objects where scene cuts/breaks occur.
  135. base_timecode: The base_timecode of which all FrameTimecodes in the cut_list are based on.
  136. num_frames: The number of frames, or FrameTimecode representing duration, of the video that
  137. was processed (used to generate last scene's end time).
  138. start_frame: The start frame or FrameTimecode of the cut list. Used to generate the first
  139. scene's start time.
  140. base_timecode: [DEPRECATED] DO NOT USE. For backwards compatibility only.
  141. Returns:
  142. List of tuples in the form (start_time, end_time), where both start_time and
  143. end_time are FrameTimecode objects representing the exact time/frame where each
  144. scene occupies based on the input cut_list.
  145. """
  146. # TODO(v0.7): Use the warnings module to turn this into a warning.
  147. if base_timecode is not None:
  148. logger.error('`base_timecode` argument is deprecated has no effect.')
  149. # Scene list, where scenes are tuples of (Start FrameTimecode, End FrameTimecode).
  150. scene_list = []
  151. if not cut_list:
  152. scene_list.append((start_pos, end_pos))
  153. return scene_list
  154. # Initialize last_cut to the first frame we processed,as it will be
  155. # the start timecode for the first scene in the list.
  156. last_cut = start_pos
  157. for cut in cut_list:
  158. scene_list.append((last_cut, cut))
  159. last_cut = cut
  160. # Last scene is from last cut to end of video.
  161. scene_list.append((last_cut, end_pos))
  162. return scene_list
  163. def write_scene_list(output_csv_file: TextIO,
  164. scene_list: Iterable[Tuple[FrameTimecode, FrameTimecode]],
  165. include_cut_list: bool = True,
  166. cut_list: Optional[Iterable[FrameTimecode]] = None) -> None:
  167. """Writes the given list of scenes to an output file handle in CSV format.
  168. Arguments:
  169. output_csv_file: Handle to open file in write mode.
  170. scene_list: List of pairs of FrameTimecodes denoting each scene's start/end FrameTimecode.
  171. include_cut_list: Bool indicating if the first row should include the timecodes where
  172. each scene starts. Should be set to False if RFC 4180 compliant CSV output is required.
  173. cut_list: Optional list of FrameTimecode objects denoting the cut list (i.e. the frames
  174. in the video that need to be split to generate individual scenes). If not specified,
  175. the cut list is generated using the start times of each scene following the first one.
  176. """
  177. csv_writer = csv.writer(output_csv_file, lineterminator='\n')
  178. # If required, output the cutting list as the first row (i.e. before the header row).
  179. if include_cut_list:
  180. csv_writer.writerow(
  181. ["Timecode List:"] +
  182. cut_list if cut_list else [start.get_timecode() for start, _ in scene_list[1:]])
  183. csv_writer.writerow([
  184. "Scene Number", "Start Frame", "Start Timecode", "Start Time (seconds)", "End Frame",
  185. "End Timecode", "End Time (seconds)", "Length (frames)", "Length (timecode)",
  186. "Length (seconds)"
  187. ])
  188. for i, (start, end) in enumerate(scene_list):
  189. duration = end - start
  190. csv_writer.writerow([
  191. '%d' % (i + 1),
  192. '%d' % (start.get_frames() + 1),
  193. start.get_timecode(),
  194. '%.3f' % start.get_seconds(),
  195. '%d' % end.get_frames(),
  196. end.get_timecode(),
  197. '%.3f' % end.get_seconds(),
  198. '%d' % duration.get_frames(),
  199. duration.get_timecode(),
  200. '%.3f' % duration.get_seconds()
  201. ])
  202. def write_scene_list_html(output_html_filename,
  203. scene_list,
  204. cut_list=None,
  205. css=None,
  206. css_class='mytable',
  207. image_filenames=None,
  208. image_width=None,
  209. image_height=None):
  210. """Writes the given list of scenes to an output file handle in html format.
  211. Arguments:
  212. output_html_filename: filename of output html file
  213. scene_list: List of pairs of FrameTimecodes denoting each scene's start/end FrameTimecode.
  214. cut_list: Optional list of FrameTimecode objects denoting the cut list (i.e. the frames
  215. in the video that need to be split to generate individual scenes). If not passed,
  216. the start times of each scene (besides the 0th scene) is used instead.
  217. css: String containing all the css information for the resulting html page.
  218. css_class: String containing the named css class
  219. image_filenames: dict where key i contains a list with n elements (filenames of
  220. the n saved images from that scene)
  221. image_width: Optional desired width of images in table in pixels
  222. image_height: Optional desired height of images in table in pixels
  223. """
  224. if not css:
  225. css = """
  226. table.mytable {
  227. font-family: times;
  228. font-size:12px;
  229. color:#000000;
  230. border-width: 1px;
  231. border-color: #eeeeee;
  232. border-collapse: collapse;
  233. background-color: #ffffff;
  234. width=100%;
  235. max-width:550px;
  236. table-layout:fixed;
  237. }
  238. table.mytable th {
  239. border-width: 1px;
  240. padding: 8px;
  241. border-style: solid;
  242. border-color: #eeeeee;
  243. background-color: #e6eed6;
  244. color:#000000;
  245. }
  246. table.mytable td {
  247. border-width: 1px;
  248. padding: 8px;
  249. border-style: solid;
  250. border-color: #eeeeee;
  251. }
  252. #code {
  253. display:inline;
  254. font-family: courier;
  255. color: #3d9400;
  256. }
  257. #string {
  258. display:inline;
  259. font-weight: bold;
  260. }
  261. """
  262. # Output Timecode list
  263. timecode_table = SimpleTable(
  264. [["Timecode List:"] +
  265. (cut_list if cut_list else [start.get_timecode() for start, _ in scene_list[1:]])],
  266. css_class=css_class)
  267. # Output list of scenes
  268. header_row = [
  269. "Scene Number", "Start Frame", "Start Timecode", "Start Time (seconds)", "End Frame",
  270. "End Timecode", "End Time (seconds)", "Length (frames)", "Length (timecode)",
  271. "Length (seconds)"
  272. ]
  273. for i, (start, end) in enumerate(scene_list):
  274. duration = end - start
  275. row = SimpleTableRow([
  276. '%d' % (i + 1),
  277. '%d' % (start.get_frames() + 1),
  278. start.get_timecode(),
  279. '%.3f' % start.get_seconds(),
  280. '%d' % end.get_frames(),
  281. end.get_timecode(),
  282. '%.3f' % end.get_seconds(),
  283. '%d' % duration.get_frames(),
  284. duration.get_timecode(),
  285. '%.3f' % duration.get_seconds()
  286. ])
  287. if image_filenames:
  288. for image in image_filenames[i]:
  289. row.add_cell(
  290. SimpleTableCell(
  291. SimpleTableImage(image, width=image_width, height=image_height)))
  292. if i == 0:
  293. scene_table = SimpleTable(rows=[row], header_row=header_row, css_class=css_class)
  294. else:
  295. scene_table.add_row(row=row)
  296. # Write html file
  297. page = HTMLPage()
  298. page.add_table(timecode_table)
  299. page.add_table(scene_table)
  300. page.css = css
  301. page.save(output_html_filename)
  302. #
  303. # TODO(v1.0): Refactor to take a SceneList object; consider moving this and save scene list
  304. # to a better spot, or just move them to scene_list.py.
  305. #
  306. def save_images(scene_list: List[Tuple[FrameTimecode, FrameTimecode]],
  307. video: VideoStream,
  308. num_images: int = 3,
  309. frame_margin: int = 1,
  310. image_extension: str = 'jpg',
  311. encoder_param: int = 95,
  312. image_name_template: str = '$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER',
  313. output_dir: Optional[str] = None,
  314. show_progress: Optional[bool] = False,
  315. scale: Optional[float] = None,
  316. height: Optional[int] = None,
  317. width: Optional[int] = None,
  318. interpolation: Interpolation = Interpolation.CUBIC,
  319. video_manager=None) -> Dict[int, List[str]]:
  320. """Save a set number of images from each scene, given a list of scenes
  321. and the associated video/frame source.
  322. Arguments:
  323. scene_list: A list of scenes (pairs of FrameTimecode objects) returned
  324. from calling a SceneManager's detect_scenes() method.
  325. video: A VideoStream object corresponding to the scene list.
  326. Note that the video will be closed/re-opened and seeked through.
  327. num_images: Number of images to generate for each scene. Minimum is 1.
  328. frame_margin: Number of frames to pad each scene around the beginning
  329. and end (e.g. moves the first/last image into the scene by N frames).
  330. Can set to 0, but will result in some video files failing to extract
  331. the very last frame.
  332. image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp').
  333. encoder_param: Quality/compression efficiency, based on type of image:
  334. 'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp.
  335. 'png': Compression from 1-9, where 9 achieves best filesize but is slower to encode.
  336. image_name_template: Template to use for naming image files. Can use the template variables
  337. $VIDEO_NAME, $SCENE_NUMBER, $IMAGE_NUMBER, $TIMECODE, $FRAME_NUMBER, $TIMESTAMP_MS.
  338. Should not include an extension.
  339. output_dir: Directory to output the images into. If not set, the output
  340. is created in the working directory.
  341. show_progress: If True, shows a progress bar if tqdm is installed.
  342. scale: Optional factor by which to rescale saved images. A scaling factor of 1 would
  343. not result in rescaling. A value < 1 results in a smaller saved image, while a
  344. value > 1 results in an image larger than the original. This value is ignored if
  345. either the height or width values are specified.
  346. height: Optional value for the height of the saved images. Specifying both the height
  347. and width will resize images to an exact size, regardless of aspect ratio.
  348. Specifying only height will rescale the image to that number of pixels in height
  349. while preserving the aspect ratio.
  350. width: Optional value for the width of the saved images. Specifying both the width
  351. and height will resize images to an exact size, regardless of aspect ratio.
  352. Specifying only width will rescale the image to that number of pixels wide
  353. while preserving the aspect ratio.
  354. interpolation: Type of interpolation to use when resizing images.
  355. video_manager: [DEPRECATED] DO NOT USE. For backwards compatibility only.
  356. Returns:
  357. Dictionary of the format { scene_num : [image_paths] }, where scene_num is the
  358. number of the scene in scene_list (starting from 1), and image_paths is a list of
  359. the paths to the newly saved/created images.
  360. Raises:
  361. ValueError: Raised if any arguments are invalid or out of range (e.g.
  362. if num_images is negative).
  363. """
  364. # TODO(v0.7): Add DeprecationWarning that `video_manager` will be removed in v0.8.
  365. if video_manager is not None:
  366. logger.error('`video_manager` argument is deprecated, use `video` instead.')
  367. video = video_manager
  368. if not scene_list:
  369. return {}
  370. if num_images <= 0 or frame_margin < 0:
  371. raise ValueError()
  372. # TODO: Validate that encoder_param is within the proper range.
  373. # Should be between 0 and 100 (inclusive) for jpg/webp, and 1-9 for png.
  374. imwrite_param = [get_cv2_imwrite_params()[image_extension], encoder_param
  375. ] if encoder_param is not None else []
  376. video.reset()
  377. # Setup flags and init progress bar if available.
  378. completed = True
  379. logger.info('Generating output images (%d per scene)...', num_images)
  380. progress_bar = None
  381. if show_progress:
  382. progress_bar = tqdm(total=len(scene_list) * num_images, unit='images', dynamic_ncols=True)
  383. filename_template = Template(image_name_template)
  384. scene_num_format = '%0'
  385. scene_num_format += str(max(3, math.floor(math.log(len(scene_list), 10)) + 1)) + 'd'
  386. image_num_format = '%0'
  387. image_num_format += str(math.floor(math.log(num_images, 10)) + 2) + 'd'
  388. framerate = scene_list[0][0].framerate
  389. # TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
  390. timecode_list = [
  391. [
  392. FrameTimecode(int(f), fps=framerate) for f in [
  393. # middle frames
  394. a[len(a) // 2] if (0 < j < num_images - 1) or num_images == 1
  395. # first frame
  396. else min(a[0] + frame_margin, a[-1]) if j == 0
  397. # last frame
  398. else max(a[-1] - frame_margin, a[0])
  399. # for each evenly-split array of frames in the scene list
  400. for j, a in enumerate(np.array_split(r, num_images))
  401. ]
  402. ] for i, r in enumerate([
  403. # pad ranges to number of images
  404. r if 1 + r[-1] - r[0] >= num_images else list(r) + [r[-1]] * (num_images - len(r))
  405. # create range of frames in scene
  406. for r in (
  407. range(
  408. start.get_frames(),
  409. start.get_frames() + max(
  410. 1, # guard against zero length scenes
  411. end.get_frames() - start.get_frames()))
  412. # for each scene in scene list
  413. for start, end in scene_list)
  414. ])
  415. ]
  416. image_filenames = {i: [] for i in range(len(timecode_list))}
  417. aspect_ratio = video.aspect_ratio
  418. if abs(aspect_ratio - 1.0) < 0.01:
  419. aspect_ratio = None
  420. logger.debug('Writing images with template %s', filename_template.template)
  421. for i, scene_timecodes in enumerate(timecode_list):
  422. for j, image_timecode in enumerate(scene_timecodes):
  423. video.seek(image_timecode)
  424. frame_im = video.read()
  425. if frame_im is not None:
  426. # TODO: Allow NUM to be a valid suffix in addition to NUMBER.
  427. file_path = '%s.%s' % (
  428. filename_template.safe_substitute(
  429. VIDEO_NAME=video.name,
  430. SCENE_NUMBER=scene_num_format % (i + 1),
  431. IMAGE_NUMBER=image_num_format % (j + 1),
  432. FRAME_NUMBER=image_timecode.get_frames(),
  433. TIMESTAMP_MS=int(image_timecode.get_seconds() * 1000),
  434. TIMECODE=image_timecode.get_timecode().replace(":", ";")),
  435. image_extension,
  436. )
  437. image_filenames[i].append(file_path)
  438. # TODO: Combine this resize with the ones below.
  439. if aspect_ratio is not None:
  440. frame_im = cv2.resize(
  441. frame_im, (0, 0),
  442. fx=aspect_ratio,
  443. fy=1.0,
  444. interpolation=interpolation.value)
  445. frame_height = frame_im.shape[0]
  446. frame_width = frame_im.shape[1]
  447. # Figure out what kind of resizing needs to be done
  448. if height or width:
  449. if height and not width:
  450. factor = height / float(frame_height)
  451. width = int(factor * frame_width)
  452. if width and not height:
  453. factor = width / float(frame_width)
  454. height = int(factor * frame_height)
  455. assert height > 0 and width > 0
  456. frame_im = cv2.resize(
  457. frame_im, (width, height), interpolation=interpolation.value)
  458. elif scale:
  459. frame_im = cv2.resize(
  460. frame_im, (0, 0), fx=scale, fy=scale, interpolation=interpolation.value)
  461. cv2.imwrite(get_and_create_path(file_path, output_dir), frame_im, imwrite_param)
  462. else:
  463. completed = False
  464. break
  465. if progress_bar is not None:
  466. progress_bar.update(1)
  467. if progress_bar is not None:
  468. progress_bar.close()
  469. if not completed:
  470. logger.error('Could not generate all output images.')
  471. return image_filenames
  472. ##
  473. ## SceneManager Class Implementation
  474. ##
  475. class SceneManager:
  476. """The SceneManager facilitates detection of scenes (:meth:`detect_scenes`) on a video
  477. (:class:`VideoStream <scenedetect.video_stream.VideoStream>`) using a detector
  478. (:meth:`add_detector`). Video decoding is done in parallel in a background thread.
  479. """
  480. def __init__(
  481. self,
  482. stats_manager: Optional[StatsManager] = None,
  483. ):
  484. """
  485. Arguments:
  486. stats_manager: :class:`StatsManager` to bind to this `SceneManager`. Can be
  487. accessed via the `stats_manager` property of the resulting object to save to disk.
  488. """
  489. self._cutting_list = []
  490. self._event_list = []
  491. self._detector_list: List[SceneDetector] = []
  492. self._sparse_detector_list = []
  493. # TODO(v1.0): This class should own a StatsManager instead of taking an optional one.
  494. # Expose a new `stats_manager` @property from the SceneManager, and either change the
  495. # `stats_manager` argument to to `store_stats: bool=False`, or lazy-init one.
  496. # TODO(v1.0): This class should own a VideoStream as well, instead of passing one
  497. # to the detect_scenes method. If concatenation is required, it can be implemented as
  498. # a generic VideoStream wrapper.
  499. self._stats_manager: Optional[StatsManager] = stats_manager
  500. # Position of video that was first passed to detect_scenes.
  501. self._start_pos: FrameTimecode = None
  502. # Position of video on the last frame processed by detect_scenes.
  503. self._last_pos: FrameTimecode = None
  504. # Size of the decoded frames.
  505. self._frame_size: Tuple[int, int] = None
  506. self._frame_size_errors: int = 0
  507. self._base_timecode: Optional[FrameTimecode] = None
  508. self._downscale: int = 1
  509. self._auto_downscale: bool = True
  510. # Interpolation method to use when downscaling. Defaults to linear interpolation
  511. # as a good balance between quality and performance.
  512. self._interpolation: Interpolation = Interpolation.LINEAR
  513. # Boolean indicating if we have only seen EventType.CUT events so far.
  514. self._only_cuts: bool = True
  515. # Set by decode thread when an exception occurs.
  516. self._exception_info = None
  517. self._stop = threading.Event()
  518. self._frame_buffer = []
  519. self._frame_buffer_size = 0
  520. @property
  521. def interpolation(self) -> Interpolation:
  522. """Interpolation method to use when downscaling frames. Must be one of cv2.INTER_*."""
  523. return self._interpolation
  524. @interpolation.setter
  525. def interpolation(self, value: Interpolation):
  526. self._interpolation = value
  527. @property
  528. def stats_manager(self) -> Optional[StatsManager]:
  529. """Getter for the StatsManager associated with this SceneManager, if any."""
  530. return self._stats_manager
  531. @property
  532. def downscale(self) -> int:
  533. """Factor to downscale each frame by. Will always be >= 1, where 1
  534. indicates no scaling. Will be ignored if auto_downscale=True."""
  535. return self._downscale
  536. @downscale.setter
  537. def downscale(self, value: int):
  538. """Set to 1 for no downscaling, 2 for 2x downscaling, 3 for 3x, etc..."""
  539. if value < 1:
  540. raise ValueError("Downscale factor must be a positive integer >= 1!")
  541. if self.auto_downscale:
  542. logger.warning("Downscale factor will be ignored because auto_downscale=True!")
  543. if value is not None and not isinstance(value, int):
  544. logger.warning("Downscale factor will be truncated to integer!")
  545. value = int(value)
  546. self._downscale = value
  547. @property
  548. def auto_downscale(self) -> bool:
  549. """If set to True, will automatically downscale based on video frame size.
  550. Overrides `downscale` if set."""
  551. return self._auto_downscale
  552. @auto_downscale.setter
  553. def auto_downscale(self, value: bool):
  554. self._auto_downscale = value
  555. def add_detector(self, detector: SceneDetector) -> None:
  556. """Add/register a SceneDetector (e.g. ContentDetector, ThresholdDetector) to
  557. run when detect_scenes is called. The SceneManager owns the detector object,
  558. so a temporary may be passed.
  559. Arguments:
  560. detector (SceneDetector): Scene detector to add to the SceneManager.
  561. """
  562. if self._stats_manager is None and detector.stats_manager_required():
  563. # Make sure the lists are empty so that the detectors don't get
  564. # out of sync (require an explicit statsmanager instead)
  565. assert not self._detector_list and not self._sparse_detector_list
  566. self._stats_manager = StatsManager()
  567. detector.stats_manager = self._stats_manager
  568. if self._stats_manager is not None:
  569. self._stats_manager.register_metrics(detector.get_metrics())
  570. if not issubclass(type(detector), SparseSceneDetector):
  571. self._detector_list.append(detector)
  572. else:
  573. self._sparse_detector_list.append(detector)
  574. self._frame_buffer_size = max(detector.event_buffer_length, self._frame_buffer_size)
  575. def get_num_detectors(self) -> int:
  576. """Get number of registered scene detectors added via add_detector. """
  577. return len(self._detector_list)
  578. def clear(self) -> None:
  579. """Clear all cuts/scenes and resets the SceneManager's position.
  580. Any statistics generated are still saved in the StatsManager object passed to the
  581. SceneManager's constructor, and thus, subsequent calls to detect_scenes, using the same
  582. frame source seeked back to the original time (or beginning of the video) will use the
  583. cached frame metrics that were computed and saved in the previous call to detect_scenes.
  584. """
  585. self._cutting_list.clear()
  586. self._event_list.clear()
  587. self._last_pos = None
  588. self._start_pos = None
  589. self._frame_size = None
  590. self.clear_detectors()
  591. def clear_detectors(self) -> None:
  592. """Remove all scene detectors added to the SceneManager via add_detector(). """
  593. self._detector_list.clear()
  594. self._sparse_detector_list.clear()
  595. def get_scene_list(self,
  596. base_timecode: Optional[FrameTimecode] = None,
  597. start_in_scene: bool = False) -> List[Tuple[FrameTimecode, FrameTimecode]]:
  598. """Return a list of tuples of start/end FrameTimecodes for each detected scene.
  599. Arguments:
  600. base_timecode: [DEPRECATED] DO NOT USE. For backwards compatibility.
  601. start_in_scene: Assume the video begins in a scene. This means that when detecting
  602. fast cuts with `ContentDetector`, if no cuts are found, the resulting scene list
  603. will contain a single scene spanning the entire video (instead of no scenes).
  604. When detecting fades with `ThresholdDetector`, the beginning portion of the video
  605. will always be included until the first fade-out event is detected.
  606. Returns:
  607. List of tuples in the form (start_time, end_time), where both start_time and
  608. end_time are FrameTimecode objects representing the exact time/frame where each
  609. detected scene in the video begins and ends.
  610. """
  611. # TODO(v0.7): Replace with DeprecationWarning that `base_timecode` will be removed in v0.8.
  612. if base_timecode is not None:
  613. logger.error('`base_timecode` argument is deprecated and has no effect.')
  614. if self._base_timecode is None:
  615. return []
  616. cut_list = self._get_cutting_list()
  617. scene_list = get_scenes_from_cuts(
  618. cut_list=cut_list, start_pos=self._start_pos, end_pos=self._last_pos + 1)
  619. # If we didn't actually detect any cuts, make sure the resulting scene_list is empty
  620. # unless start_in_scene is True.
  621. if not cut_list and not start_in_scene:
  622. scene_list = []
  623. return sorted(self._get_event_list() + scene_list)
  624. def _get_cutting_list(self) -> List[int]:
  625. """Return a sorted list of unique frame numbers of any detected scene cuts."""
  626. if not self._cutting_list:
  627. return []
  628. assert self._base_timecode is not None
  629. # Ensure all cuts are unique by using a set to remove all duplicates.
  630. return [self._base_timecode + cut for cut in sorted(set(self._cutting_list))]
  631. def _get_event_list(self) -> List[Tuple[FrameTimecode, FrameTimecode]]:
  632. if not self._event_list:
  633. return []
  634. assert self._base_timecode is not None
  635. return [(self._base_timecode + start, self._base_timecode + end)
  636. for start, end in self._event_list]
  637. def _process_frame(self,
  638. frame_num: int,
  639. frame_im: np.ndarray,
  640. callback: Optional[Callable[[np.ndarray, int], None]] = None) -> bool:
  641. """Add any cuts detected with the current frame to the cutting list. Returns True if any new
  642. cuts were detected, False otherwise."""
  643. new_cuts = False
  644. # TODO(#283): This breaks with AdaptiveDetector as cuts differ from the frame number
  645. # being processed. Allow detectors to specify the max frame lookahead they require
  646. # (i.e. any event will never be more than N frames behind the current one).
  647. self._frame_buffer.append(frame_im)
  648. # frame_buffer[-1] is current frame, -2 is one behind, etc
  649. # so index based on cut frame should be [event_frame - (frame_num + 1)]
  650. self._frame_buffer = self._frame_buffer[-(self._frame_buffer_size + 1):]
  651. for detector in self._detector_list:
  652. cuts = detector.process_frame(frame_num, frame_im)
  653. self._cutting_list += cuts
  654. new_cuts = True if cuts else False
  655. if callback:
  656. for cut_frame_num in cuts:
  657. buffer_index = cut_frame_num - (frame_num + 1)
  658. callback(self._frame_buffer[buffer_index], cut_frame_num)
  659. for detector in self._sparse_detector_list:
  660. events = detector.process_frame(frame_num, frame_im)
  661. self._event_list += events
  662. if callback:
  663. for event_start, _ in events:
  664. buffer_index = event_start - (frame_num + 1)
  665. callback(self._frame_buffer[buffer_index], event_start)
  666. return new_cuts
  667. def _post_process(self, frame_num: int) -> None:
  668. """Add remaining cuts to the cutting list, after processing the last frame."""
  669. for detector in self._detector_list:
  670. self._cutting_list += detector.post_process(frame_num)
  671. def stop(self) -> None:
  672. """Stop the current :meth:`detect_scenes` call, if any. Thread-safe."""
  673. self._stop.set()
  674. def detect_scenes(self,
  675. video: VideoStream = None,
  676. duration: Optional[FrameTimecode] = None,
  677. end_time: Optional[FrameTimecode] = None,
  678. frame_skip: int = 0,
  679. show_progress: bool = False,
  680. callback: Optional[Callable[[np.ndarray, int], None]] = None,
  681. frame_source: Optional[VideoStream] = None) -> int:
  682. """Perform scene detection on the given video using the added SceneDetectors, returning the
  683. number of frames processed. Results can be obtained by calling :meth:`get_scene_list` or
  684. :meth:`get_cut_list`.
  685. Video decoding is performed in a background thread to allow scene detection and frame
  686. decoding to happen in parallel. Detection will continue until no more frames are left,
  687. the specified duration or end time has been reached, or :meth:`stop` was called.
  688. Arguments:
  689. video: VideoStream obtained from either `scenedetect.open_video`, or by creating
  690. one directly (e.g. `scenedetect.backends.opencv.VideoStreamCv2`).
  691. duration: Amount of time to detect from current video position. Cannot be
  692. specified if `end_time` is set.
  693. end_time: Time to stop processing at. Cannot be specified if `duration` is set.
  694. frame_skip: Not recommended except for extremely high framerate videos.
  695. Number of frames to skip (i.e. process every 1 in N+1 frames,
  696. where N is frame_skip, processing only 1/N+1 percent of the video,
  697. speeding up the detection time at the expense of accuracy).
  698. `frame_skip` **must** be 0 (the default) when using a StatsManager.
  699. show_progress: If True, and the ``tqdm`` module is available, displays
  700. a progress bar with the progress, framerate, and expected time to
  701. complete processing the video frame source.
  702. callback: If set, called after each scene/event detected.
  703. frame_source: [DEPRECATED] DO NOT USE. For compatibility with previous version.
  704. Returns:
  705. int: Number of frames read and processed from the frame source.
  706. Raises:
  707. ValueError: `frame_skip` **must** be 0 (the default) if the SceneManager
  708. was constructed with a StatsManager object.
  709. """
  710. # TODO(v0.7): Add DeprecationWarning that `frame_source` will be removed in v0.8.
  711. if frame_source is not None:
  712. video = frame_source
  713. # TODO(v0.8): Remove default value for `video` after `frame_source` is removed.
  714. if video is None:
  715. raise TypeError("detect_scenes() missing 1 required positional argument: 'video'")
  716. if frame_skip > 0 and self.stats_manager is not None:
  717. raise ValueError('frame_skip must be 0 when using a StatsManager.')
  718. if duration is not None and end_time is not None:
  719. raise ValueError('duration and end_time cannot be set at the same time!')
  720. # TODO: These checks should be handled by the FrameTimecode constructor.
  721. if duration is not None and isinstance(duration, (int, float)) and duration < 0:
  722. raise ValueError('duration must be greater than or equal to 0!')
  723. if end_time is not None and isinstance(end_time, (int, float)) and end_time < 0:
  724. raise ValueError('end_time must be greater than or equal to 0!')
  725. self._base_timecode = video.base_timecode
  726. # TODO: Figure out a better solution for communicating framerate to StatsManager.
  727. if self._stats_manager is not None:
  728. self._stats_manager._base_timecode = self._base_timecode
  729. start_frame_num: int = video.frame_number
  730. if end_time is not None:
  731. end_time = self._base_timecode + end_time
  732. elif duration is not None:
  733. end_time = (self._base_timecode + duration) + start_frame_num
  734. total_frames = 0
  735. if video.duration is not None:
  736. if end_time is not None and end_time < video.duration:
  737. total_frames = (end_time - start_frame_num)
  738. else:
  739. total_frames = (video.duration.get_frames() - start_frame_num)
  740. # Calculate the desired downscale factor and log the effective resolution.
  741. if self.auto_downscale:
  742. downscale_factor = compute_downscale_factor(frame_width=video.frame_size[0])
  743. else:
  744. downscale_factor = self.downscale
  745. if downscale_factor > 1:
  746. logger.info('Downscale factor set to %d, effective resolution: %d x %d',
  747. downscale_factor, video.frame_size[0] // downscale_factor,
  748. video.frame_size[1] // downscale_factor)
  749. progress_bar = None
  750. if show_progress:
  751. progress_bar = tqdm(
  752. total=int(total_frames),
  753. unit='frames',
  754. desc=PROGRESS_BAR_DESCRIPTION % 0,
  755. dynamic_ncols=True,
  756. )
  757. frame_queue = queue.Queue(MAX_FRAME_QUEUE_LENGTH)
  758. self._stop.clear()
  759. decode_thread = threading.Thread(
  760. target=SceneManager._decode_thread,
  761. args=(self, video, frame_skip, downscale_factor, end_time, frame_queue),
  762. daemon=True)
  763. decode_thread.start()
  764. frame_im = None
  765. logger.info('Detecting scenes...')
  766. while not self._stop.is_set():
  767. next_frame, position = frame_queue.get()
  768. if next_frame is None and position is None:
  769. break
  770. if not next_frame is None:
  771. frame_im = next_frame
  772. new_cuts = self._process_frame(position.frame_num, frame_im, callback)
  773. if progress_bar is not None:
  774. if new_cuts:
  775. progress_bar.set_description(
  776. PROGRESS_BAR_DESCRIPTION % len(self._cutting_list), refresh=False)
  777. progress_bar.update(1 + frame_skip)
  778. if progress_bar is not None:
  779. progress_bar.set_description(
  780. PROGRESS_BAR_DESCRIPTION % len(self._cutting_list), refresh=True)
  781. progress_bar.close()
  782. # Unblock any puts in the decode thread before joining. This can happen if the main
  783. # processing thread stops before the decode thread.
  784. while not frame_queue.empty():
  785. frame_queue.get_nowait()
  786. decode_thread.join()
  787. if self._exception_info is not None:
  788. raise self._exception_info[1].with_traceback(self._exception_info[2])
  789. self._last_pos = video.position
  790. self._post_process(video.position.frame_num)
  791. return video.frame_number - start_frame_num
  792. def _decode_thread(
  793. self,
  794. video: VideoStream,
  795. frame_skip: int,
  796. downscale_factor: int,
  797. end_time: FrameTimecode,
  798. out_queue: queue.Queue,
  799. ):
  800. try:
  801. while not self._stop.is_set():
  802. frame_im = None
  803. # We don't do any kind of locking here since the worst-case of this being wrong
  804. # is that we do some extra work, and this function should never mutate any data
  805. # (all of which should be modified under the GIL).
  806. # TODO(v1.0): This optimization should be removed as it is an uncommon use case and
  807. # greatly increases the complexity of detection algorithms using it.
  808. if self._is_processing_required(video.position.frame_num):
  809. frame_im = video.read()
  810. if frame_im is False:
  811. break
  812. # Verify the decoded frame size against the video container's reported
  813. # resolution, and also verify that consecutive frames have the correct size.
  814. decoded_size = (frame_im.shape[1], frame_im.shape[0])
  815. if self._frame_size is None:
  816. self._frame_size = decoded_size
  817. if video.frame_size != decoded_size:
  818. logger.warn(
  819. f"WARNING: Decoded frame size ({decoded_size}) does not match "
  820. f" video resolution {video.frame_size}, possible corrupt input.")
  821. elif self._frame_size != decoded_size:
  822. self._frame_size_errors += 1
  823. if self._frame_size_errors <= MAX_FRAME_SIZE_ERRORS:
  824. logger.error(
  825. f"ERROR: Frame at {str(video.position)} has incorrect size and "
  826. f"cannot be processed: decoded size = {decoded_size}, "
  827. f"expected = {self._frame_size}. Video may be corrupt.")
  828. if self._frame_size_errors == MAX_FRAME_SIZE_ERRORS:
  829. logger.warn(
  830. f"WARNING: Too many errors emitted, skipping future messages.")
  831. # Skip processing frames that have an incorrect size.
  832. continue
  833. if downscale_factor > 1:
  834. frame_im = cv2.resize(
  835. frame_im, (round(frame_im.shape[1] / downscale_factor),
  836. round(frame_im.shape[0] / downscale_factor)),
  837. interpolation=self._interpolation.value)
  838. else:
  839. if video.read(decode=False) is False:
  840. break
  841. # Set the start position now that we decoded at least the first frame.
  842. if self._start_pos is None:
  843. self._start_pos = video.position
  844. out_queue.put((frame_im, video.position))
  845. if frame_skip > 0:
  846. for _ in range(frame_skip):
  847. if not video.read(decode=False):
  848. break
  849. # End time includes the presentation time of the frame, but the `position`
  850. # property of a VideoStream references the beginning of the frame in time.
  851. if end_time is not None and not (video.position + 1) < end_time:
  852. break
  853. # If *any* exceptions occur, we re-raise them in the main thread so that the caller of
  854. # detect_scenes can handle it.
  855. except KeyboardInterrupt:
  856. logger.debug("Received KeyboardInterrupt.")
  857. self._stop.set()
  858. except BaseException:
  859. logger.critical('Fatal error: Exception raised in decode thread.')
  860. self._exception_info = sys.exc_info()
  861. self._stop.set()
  862. finally:
  863. # Handle case where start position was never set if we did not decode any frames.
  864. if self._start_pos is None:
  865. self._start_pos = video.position
  866. # Make sure main thread stops processing loop.
  867. out_queue.put((None, None))
  868. # pylint: enable=bare-except
  869. #
  870. # Deprecated Methods
  871. #
  872. # pylint: disable=unused-argument
  873. def get_cut_list(self,
  874. base_timecode: Optional[FrameTimecode] = None,
  875. show_warning: bool = True) -> List[FrameTimecode]:
  876. """[DEPRECATED] Return a list of FrameTimecodes of the detected scene changes/cuts.
  877. Unlike get_scene_list, the cutting list returns a list of FrameTimecodes representing
  878. the point in the input video where a new scene was detected, and thus the frame
  879. where the input should be cut/split. The cutting list, in turn, is used to generate
  880. the scene list, noting that each scene is contiguous starting from the first frame
  881. and ending at the last frame detected.
  882. If only sparse detectors are used (e.g. MotionDetector), this will always be empty.
  883. Arguments:
  884. base_timecode: [DEPRECATED] DO NOT USE. For backwards compatibility only.
  885. show_warning: If set to False, suppresses the error from being warned. In v0.7,
  886. this will have no effect and the error will become a Python warning.
  887. Returns:
  888. List of FrameTimecode objects denoting the points in time where a scene change
  889. was detected in the input video, which can also be passed to external tools
  890. for automated splitting of the input into individual scenes.
  891. """
  892. # TODO(v0.7): Use the warnings module to turn this into a warning.
  893. if show_warning:
  894. logger.error('`get_cut_list()` is deprecated and will be removed in a future release.')
  895. return self._get_cutting_list()
  896. def get_event_list(
  897. self,
  898. base_timecode: Optional[FrameTimecode] = None
  899. ) -> List[Tuple[FrameTimecode, FrameTimecode]]:
  900. """[DEPRECATED] DO NOT USE.
  901. Get a list of start/end timecodes of sparse detection events.
  902. Unlike get_scene_list, the event list returns a list of FrameTimecodes representing
  903. the point in the input video where a new scene was detected only by sparse detectors,
  904. otherwise it is the same.
  905. Arguments:
  906. base_timecode: [DEPRECATED] DO NOT USE. For backwards compatibility only.
  907. Returns:
  908. List of pairs of FrameTimecode objects denoting the detected scenes.
  909. """
  910. # TODO(v0.7): Use the warnings module to turn this into a warning.
  911. logger.error('`get_event_list()` is deprecated and will be removed in a future release.')
  912. return self._get_event_list()
  913. # pylint: enable=unused-argument
  914. def _is_processing_required(self, frame_num: int) -> bool:
  915. """True if frame metrics not in StatsManager, False otherwise."""
  916. if self.stats_manager is None:
  917. return True
  918. return all([detector.is_processing_required(frame_num) for detector in self._detector_list])