mirror of
https://github.com/home-assistant/core.git
synced 2025-07-16 09:47:13 +00:00
Add audio to stream (#38846)
* Add audio to stream component * Use container options to do most fmp4 formatting * Add test for treatment of different audio inputs * Add test for treatment of different audio inputs * pcm_mulaw frames should be s16 * Use seek to get BytesIO length * Remove unused utcnow * Remove peek_next_audio_pts * only demux audio and video packets - ignoring data and subtitle streams Co-authored-by: Jason Hunter <hunterjm@gmail.com>
This commit is contained in:
parent
b0974d89a6
commit
dc2d0b9297
@ -15,7 +15,7 @@ OUTPUT_FORMATS = ["hls"]
|
||||
|
||||
FORMAT_CONTENT_TYPE = {"hls": "application/vnd.apple.mpegurl"}
|
||||
|
||||
AUDIO_SAMPLE_RATE = 44100
|
||||
|
||||
MAX_SEGMENTS = 3 # Max number of segments to keep around
|
||||
MIN_SEGMENT_DURATION = 1.5 # Each segment is at least this many seconds
|
||||
|
||||
PACKETS_TO_WAIT_FOR_AUDIO = 20 # Some streams have an audio stream with no audio
|
||||
|
@ -2,7 +2,7 @@
|
||||
import asyncio
|
||||
from collections import deque
|
||||
import io
|
||||
from typing import Any, List
|
||||
from typing import Any, Callable, List
|
||||
|
||||
from aiohttp import web
|
||||
import attr
|
||||
@ -34,7 +34,6 @@ class Segment:
|
||||
sequence: int = attr.ib()
|
||||
segment: io.BytesIO = attr.ib()
|
||||
duration: float = attr.ib()
|
||||
start_pts: tuple = attr.ib()
|
||||
|
||||
|
||||
class StreamOutput:
|
||||
@ -61,8 +60,8 @@ class StreamOutput:
|
||||
return None
|
||||
|
||||
@property
|
||||
def audio_codec(self) -> str:
|
||||
"""Return desired audio codec."""
|
||||
def audio_codecs(self) -> str:
|
||||
"""Return desired audio codecs."""
|
||||
return None
|
||||
|
||||
@property
|
||||
@ -71,8 +70,8 @@ class StreamOutput:
|
||||
return None
|
||||
|
||||
@property
|
||||
def container_options(self) -> dict:
|
||||
"""Return container options."""
|
||||
def container_options(self) -> Callable[[int], dict]:
|
||||
"""Return Callable which takes a sequence number and returns container options."""
|
||||
return None
|
||||
|
||||
@property
|
||||
|
@ -5,7 +5,8 @@ import io
|
||||
def find_box(segment: io.BytesIO, target_type: bytes, box_start: int = 0) -> int:
|
||||
"""Find location of first box (or sub_box if box_start provided) of given type."""
|
||||
if box_start == 0:
|
||||
box_end = len(segment.getbuffer())
|
||||
box_end = segment.seek(0, io.SEEK_END)
|
||||
segment.seek(0)
|
||||
index = 0
|
||||
else:
|
||||
segment.seek(box_start)
|
||||
@ -29,22 +30,9 @@ def get_init(segment: io.BytesIO) -> bytes:
|
||||
return segment.read(moof_location)
|
||||
|
||||
|
||||
def get_m4s(segment: io.BytesIO, start_pts: tuple, sequence: int) -> bytes:
|
||||
def get_m4s(segment: io.BytesIO, sequence: int) -> bytes:
|
||||
"""Get m4s section from fragmented mp4."""
|
||||
moof_location = next(find_box(segment, b"moof"))
|
||||
mfra_location = next(find_box(segment, b"mfra"))
|
||||
# adjust mfhd sequence number in moof
|
||||
view = segment.getbuffer()
|
||||
view[moof_location + 20 : moof_location + 24] = sequence.to_bytes(4, "big")
|
||||
# adjust tfdt in video traf
|
||||
traf_finder = find_box(segment, b"traf", moof_location)
|
||||
traf_location = next(traf_finder)
|
||||
tfdt_location = next(find_box(segment, b"tfdt", traf_location))
|
||||
view[tfdt_location + 12 : tfdt_location + 20] = start_pts[0].to_bytes(8, "big")
|
||||
# adjust tfdt in audio traf
|
||||
traf_location = next(traf_finder)
|
||||
tfdt_location = next(find_box(segment, b"tfdt", traf_location))
|
||||
view[tfdt_location + 12 : tfdt_location + 20] = start_pts[1].to_bytes(8, "big")
|
||||
# done adjusting
|
||||
segment.seek(moof_location)
|
||||
return segment.read(mfra_location - moof_location)
|
||||
|
@ -1,8 +1,9 @@
|
||||
"""Provide functionality to stream HLS."""
|
||||
from typing import Callable
|
||||
|
||||
from aiohttp import web
|
||||
|
||||
from homeassistant.core import callback
|
||||
from homeassistant.util.dt import utcnow
|
||||
|
||||
from .const import FORMAT_CONTENT_TYPE
|
||||
from .core import PROVIDERS, StreamOutput, StreamView
|
||||
@ -35,7 +36,7 @@ class HlsPlaylistView(StreamView):
|
||||
await track.recv()
|
||||
headers = {"Content-Type": FORMAT_CONTENT_TYPE["hls"]}
|
||||
return web.Response(
|
||||
body=renderer.render(track, utcnow()).encode("utf-8"), headers=headers
|
||||
body=renderer.render(track).encode("utf-8"), headers=headers
|
||||
)
|
||||
|
||||
|
||||
@ -71,8 +72,7 @@ class HlsSegmentView(StreamView):
|
||||
return web.HTTPNotFound()
|
||||
headers = {"Content-Type": "video/iso.segment"}
|
||||
return web.Response(
|
||||
body=get_m4s(segment.segment, segment.start_pts, int(sequence)),
|
||||
headers=headers,
|
||||
body=get_m4s(segment.segment, int(sequence)), headers=headers,
|
||||
)
|
||||
|
||||
|
||||
@ -90,11 +90,10 @@ class M3U8Renderer:
|
||||
"#EXT-X-VERSION:7",
|
||||
f"#EXT-X-TARGETDURATION:{track.target_duration}",
|
||||
'#EXT-X-MAP:URI="init.mp4"',
|
||||
"#EXT-X-INDEPENDENT-SEGMENTS",
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def render_playlist(track, start_time):
|
||||
def render_playlist(track):
|
||||
"""Render playlist."""
|
||||
segments = track.segments
|
||||
|
||||
@ -114,13 +113,9 @@ class M3U8Renderer:
|
||||
|
||||
return playlist
|
||||
|
||||
def render(self, track, start_time):
|
||||
def render(self, track):
|
||||
"""Render M3U8 file."""
|
||||
lines = (
|
||||
["#EXTM3U"]
|
||||
+ self.render_preamble(track)
|
||||
+ self.render_playlist(track, start_time)
|
||||
)
|
||||
lines = ["#EXTM3U"] + self.render_preamble(track) + self.render_playlist(track)
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
@ -139,9 +134,9 @@ class HlsStreamOutput(StreamOutput):
|
||||
return "mp4"
|
||||
|
||||
@property
|
||||
def audio_codec(self) -> str:
|
||||
"""Return desired audio codec."""
|
||||
return "aac"
|
||||
def audio_codecs(self) -> str:
|
||||
"""Return desired audio codecs."""
|
||||
return {"aac", "ac3", "mp3"}
|
||||
|
||||
@property
|
||||
def video_codecs(self) -> tuple:
|
||||
@ -149,6 +144,10 @@ class HlsStreamOutput(StreamOutput):
|
||||
return {"hevc", "h264"}
|
||||
|
||||
@property
|
||||
def container_options(self) -> dict:
|
||||
"""Return container options."""
|
||||
return {"movflags": "frag_custom+empty_moov+default_base_moof"}
|
||||
def container_options(self) -> Callable[[int], dict]:
|
||||
"""Return Callable which takes a sequence number and returns container options."""
|
||||
return lambda sequence: {
|
||||
"movflags": "frag_custom+empty_moov+default_base_moof+skip_sidx+frag_discont",
|
||||
"avoid_negative_ts": "make_non_negative",
|
||||
"fragment_index": str(sequence),
|
||||
}
|
||||
|
@ -15,16 +15,17 @@ def async_setup_recorder(hass):
|
||||
"""Only here so Provider Registry works."""
|
||||
|
||||
|
||||
def recorder_save_worker(file_out: str, segments: List[Segment]):
|
||||
def recorder_save_worker(file_out: str, segments: List[Segment], container_format: str):
|
||||
"""Handle saving stream."""
|
||||
first_pts = segments[0].start_pts[0]
|
||||
output = av.open(file_out, "w")
|
||||
first_pts = {"video": None, "audio": None}
|
||||
output = av.open(file_out, "w", format=container_format)
|
||||
output_v = None
|
||||
output_a = None
|
||||
|
||||
for segment in segments:
|
||||
# Seek to beginning and open segment
|
||||
segment.segment.seek(0)
|
||||
source = av.open(segment.segment, "r", format="mp4")
|
||||
source = av.open(segment.segment, "r", format=container_format)
|
||||
source_v = source.streams.video[0]
|
||||
|
||||
# Add output streams
|
||||
@ -32,16 +33,18 @@ def recorder_save_worker(file_out: str, segments: List[Segment]):
|
||||
output_v = output.add_stream(template=source_v)
|
||||
context = output_v.codec_context
|
||||
context.flags |= "GLOBAL_HEADER"
|
||||
if not output_a and len(source.streams.audio) > 0:
|
||||
source_a = source.streams.audio[0]
|
||||
output_a = output.add_stream(template=source_a)
|
||||
|
||||
# Remux video
|
||||
for packet in source.demux(source_v):
|
||||
for packet in source.demux():
|
||||
if packet is not None and packet.dts is not None:
|
||||
if packet.pts < segment.start_pts[0]:
|
||||
packet.pts += segment.start_pts[0]
|
||||
packet.dts += segment.start_pts[0]
|
||||
packet.pts -= first_pts
|
||||
packet.dts -= first_pts
|
||||
packet.stream = output_v
|
||||
if first_pts[packet.stream.type] is None:
|
||||
first_pts[packet.stream.type] = packet.pts
|
||||
packet.pts -= first_pts[packet.stream.type]
|
||||
packet.dts -= first_pts[packet.stream.type]
|
||||
packet.stream = output_v if packet.stream.type == "video" else output_a
|
||||
output.mux(packet)
|
||||
|
||||
source.close()
|
||||
@ -70,9 +73,9 @@ class RecorderOutput(StreamOutput):
|
||||
return "mp4"
|
||||
|
||||
@property
|
||||
def audio_codec(self) -> str:
|
||||
def audio_codecs(self) -> str:
|
||||
"""Return desired audio codec."""
|
||||
return "aac"
|
||||
return {"aac", "ac3", "mp3"}
|
||||
|
||||
@property
|
||||
def video_codecs(self) -> tuple:
|
||||
@ -96,7 +99,7 @@ class RecorderOutput(StreamOutput):
|
||||
thread = threading.Thread(
|
||||
name="recorder_save_worker",
|
||||
target=recorder_save_worker,
|
||||
args=(self.video_path, self._segments),
|
||||
args=(self.video_path, self._segments, self.format),
|
||||
)
|
||||
thread.start()
|
||||
|
||||
|
@ -1,54 +1,37 @@
|
||||
"""Provides the worker thread needed for processing streams."""
|
||||
from fractions import Fraction
|
||||
from collections import deque
|
||||
import io
|
||||
import logging
|
||||
|
||||
import av
|
||||
|
||||
from .const import AUDIO_SAMPLE_RATE, MIN_SEGMENT_DURATION
|
||||
from .const import MIN_SEGMENT_DURATION, PACKETS_TO_WAIT_FOR_AUDIO
|
||||
from .core import Segment, StreamBuffer
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_audio_frame():
|
||||
"""Generate a blank audio frame."""
|
||||
|
||||
audio_frame = av.AudioFrame(format="dbl", layout="mono", samples=1024)
|
||||
# audio_bytes = b''.join(b'\x00\x00\x00\x00\x00\x00\x00\x00'
|
||||
# for i in range(0, 1024))
|
||||
audio_bytes = b"\x00\x00\x00\x00\x00\x00\x00\x00" * 1024
|
||||
audio_frame.planes[0].update(audio_bytes)
|
||||
audio_frame.sample_rate = AUDIO_SAMPLE_RATE
|
||||
audio_frame.time_base = Fraction(1, AUDIO_SAMPLE_RATE)
|
||||
return audio_frame
|
||||
|
||||
|
||||
def create_stream_buffer(stream_output, video_stream, audio_frame):
|
||||
def create_stream_buffer(stream_output, video_stream, audio_stream, sequence):
|
||||
"""Create a new StreamBuffer."""
|
||||
|
||||
a_packet = None
|
||||
segment = io.BytesIO()
|
||||
container_options = (
|
||||
stream_output.container_options(sequence)
|
||||
if stream_output.container_options
|
||||
else {}
|
||||
)
|
||||
output = av.open(
|
||||
segment,
|
||||
mode="w",
|
||||
format=stream_output.format,
|
||||
container_options={
|
||||
"video_track_timescale": str(int(1 / video_stream.time_base)),
|
||||
**(stream_output.container_options or {}),
|
||||
},
|
||||
container_options=container_options,
|
||||
)
|
||||
vstream = output.add_stream(template=video_stream)
|
||||
# Check if audio is requested
|
||||
astream = None
|
||||
if stream_output.audio_codec:
|
||||
astream = output.add_stream(stream_output.audio_codec, AUDIO_SAMPLE_RATE)
|
||||
# Need to do it multiple times for some reason
|
||||
while not a_packet:
|
||||
a_packets = astream.encode(audio_frame)
|
||||
if a_packets:
|
||||
a_packet = a_packets[0]
|
||||
return (a_packet, StreamBuffer(segment, output, vstream, astream))
|
||||
if audio_stream and audio_stream.name in stream_output.audio_codecs:
|
||||
astream = output.add_stream(template=audio_stream)
|
||||
return StreamBuffer(segment, output, vstream, astream)
|
||||
|
||||
|
||||
def stream_worker(hass, stream, quit_event):
|
||||
@ -60,32 +43,133 @@ def stream_worker(hass, stream, quit_event):
|
||||
except (KeyError, IndexError):
|
||||
_LOGGER.error("Stream has no video")
|
||||
return
|
||||
try:
|
||||
audio_stream = container.streams.audio[0]
|
||||
except (KeyError, IndexError):
|
||||
audio_stream = None
|
||||
|
||||
audio_frame = generate_audio_frame()
|
||||
|
||||
first_packet = True
|
||||
# Holds the buffers for each stream provider
|
||||
outputs = {}
|
||||
# Keep track of the number of segments we've processed
|
||||
sequence = 1
|
||||
# Holds the generated silence that needs to be muxed into the output
|
||||
audio_packets = {}
|
||||
# The presentation timestamp of the first video packet we receive
|
||||
first_pts = 0
|
||||
# The decoder timestamp of the latest packet we processed
|
||||
# The presentation timestamps of the first packet in each stream we receive
|
||||
# Use to adjust before muxing or outputting, but we don't adjust internally
|
||||
first_pts = {}
|
||||
# The decoder timestamps of the latest packet in each stream we processed
|
||||
last_dts = None
|
||||
# Keep track of consecutive packets without a dts to detect end of stream.
|
||||
last_packet_was_without_dts = False
|
||||
# The pts at the beginning of the segment
|
||||
segment_start_v_pts = 0
|
||||
segment_start_a_pts = 0
|
||||
# Holds the buffers for each stream provider
|
||||
outputs = None
|
||||
# Keep track of the number of segments we've processed
|
||||
sequence = 0
|
||||
# The video pts at the beginning of the segment
|
||||
segment_start_pts = None
|
||||
# Because of problems 1 and 2 below, we need to store the first few packets and replay them
|
||||
initial_packets = deque()
|
||||
|
||||
# Have to work around two problems with RTSP feeds in ffmpeg
|
||||
# 1 - first frame has bad pts/dts https://trac.ffmpeg.org/ticket/5018
|
||||
# 2 - seeking can be problematic https://trac.ffmpeg.org/ticket/7815
|
||||
|
||||
def peek_first_pts():
|
||||
nonlocal first_pts, audio_stream
|
||||
|
||||
def empty_stream_dict():
|
||||
return {
|
||||
video_stream: None,
|
||||
**({audio_stream: None} if audio_stream else {}),
|
||||
}
|
||||
|
||||
try:
|
||||
first_packet = empty_stream_dict()
|
||||
first_pts = empty_stream_dict()
|
||||
# Get to first video keyframe
|
||||
while first_packet[video_stream] is None:
|
||||
packet = next(container.demux())
|
||||
if packet.stream == video_stream and packet.is_keyframe:
|
||||
first_packet[video_stream] = packet
|
||||
initial_packets.append(packet)
|
||||
# Get first_pts from subsequent frame to first keyframe
|
||||
while any(
|
||||
[pts is None for pts in {**first_packet, **first_pts}.values()]
|
||||
) and (len(initial_packets) < PACKETS_TO_WAIT_FOR_AUDIO):
|
||||
packet = next(container.demux((video_stream, audio_stream)))
|
||||
if (
|
||||
first_packet[packet.stream] is None
|
||||
): # actually video already found above so only for audio
|
||||
if packet.is_keyframe:
|
||||
first_packet[packet.stream] = packet
|
||||
else: # Discard leading non-keyframes
|
||||
continue
|
||||
else: # This is the second frame to calculate first_pts from
|
||||
if first_pts[packet.stream] is None:
|
||||
first_pts[packet.stream] = packet.dts - packet.duration
|
||||
first_packet[packet.stream].pts = first_pts[packet.stream]
|
||||
first_packet[packet.stream].dts = first_pts[packet.stream]
|
||||
initial_packets.append(packet)
|
||||
if audio_stream and first_packet[audio_stream] is None:
|
||||
_LOGGER.warning(
|
||||
"Audio stream not found"
|
||||
) # Some streams declare an audio stream and never send any packets
|
||||
del first_pts[audio_stream]
|
||||
audio_stream = None
|
||||
|
||||
except (av.AVError, StopIteration) as ex:
|
||||
# End of stream, clear listeners and stop thread
|
||||
for fmt, _ in outputs.items():
|
||||
hass.loop.call_soon_threadsafe(stream.outputs[fmt].put, None)
|
||||
_LOGGER.error(
|
||||
"Error demuxing stream while finding first packet: %s", str(ex)
|
||||
)
|
||||
quit_event.set()
|
||||
|
||||
def initialize_segment(video_pts):
|
||||
"""Reset some variables and initialize outputs for each segment."""
|
||||
nonlocal outputs, sequence, segment_start_pts
|
||||
# Clear outputs and increment sequence
|
||||
outputs = {}
|
||||
sequence += 1
|
||||
segment_start_pts = video_pts
|
||||
for stream_output in stream.outputs.values():
|
||||
if video_stream.name not in stream_output.video_codecs:
|
||||
continue
|
||||
buffer = create_stream_buffer(
|
||||
stream_output, video_stream, audio_stream, sequence
|
||||
)
|
||||
outputs[stream_output.name] = (
|
||||
buffer,
|
||||
{video_stream: buffer.vstream, audio_stream: buffer.astream},
|
||||
)
|
||||
|
||||
def mux_video_packet(packet):
|
||||
# adjust pts and dts before muxing
|
||||
packet.pts -= first_pts[video_stream]
|
||||
packet.dts -= first_pts[video_stream]
|
||||
# mux packets to each buffer
|
||||
for buffer, output_streams in outputs.values():
|
||||
# Assign the packet to the new stream & mux
|
||||
packet.stream = output_streams[video_stream]
|
||||
buffer.output.mux(packet)
|
||||
|
||||
def mux_audio_packet(packet):
|
||||
# almost the same as muxing video but add extra check
|
||||
# adjust pts and dts before muxing
|
||||
packet.pts -= first_pts[audio_stream]
|
||||
packet.dts -= first_pts[audio_stream]
|
||||
for buffer, output_streams in outputs.values():
|
||||
# Assign the packet to the new stream & mux
|
||||
if output_streams.get(audio_stream):
|
||||
packet.stream = output_streams[audio_stream]
|
||||
buffer.output.mux(packet)
|
||||
|
||||
peek_first_pts()
|
||||
last_dts = {k: v - 1 for k, v in first_pts.items()}
|
||||
initialize_segment(first_pts[video_stream])
|
||||
|
||||
while not quit_event.is_set():
|
||||
try:
|
||||
packet = next(container.demux(video_stream))
|
||||
if len(initial_packets) > 0:
|
||||
packet = initial_packets.popleft()
|
||||
else:
|
||||
packet = next(container.demux((video_stream, audio_stream)))
|
||||
if packet.dts is None:
|
||||
if first_packet:
|
||||
continue
|
||||
_LOGGER.error("Stream packet without dts detected, skipping...")
|
||||
# Allow a single packet without dts before terminating the stream.
|
||||
if last_packet_was_without_dts:
|
||||
@ -101,101 +185,35 @@ def stream_worker(hass, stream, quit_event):
|
||||
_LOGGER.error("Error demuxing stream: %s", str(ex))
|
||||
break
|
||||
|
||||
# Skip non monotonically increasing dts in feed
|
||||
if not first_packet and last_dts >= packet.dts:
|
||||
# Discard packet if dts is not monotonic
|
||||
if packet.dts <= last_dts[packet.stream]:
|
||||
continue
|
||||
last_dts = packet.dts
|
||||
|
||||
# Reset timestamps from a 0 time base for this stream
|
||||
packet.dts -= first_pts
|
||||
packet.pts -= first_pts
|
||||
# Check for end of segment
|
||||
if packet.stream == video_stream and packet.is_keyframe:
|
||||
segment_duration = (packet.pts - segment_start_pts) * packet.time_base
|
||||
if segment_duration >= MIN_SEGMENT_DURATION:
|
||||
# Save segment to outputs
|
||||
for fmt, (buffer, _) in outputs.items():
|
||||
buffer.output.close()
|
||||
if stream.outputs.get(fmt):
|
||||
hass.loop.call_soon_threadsafe(
|
||||
stream.outputs[fmt].put,
|
||||
Segment(sequence, buffer.segment, segment_duration,),
|
||||
)
|
||||
|
||||
# Reset segment on keyframe after we reach desired segment duration
|
||||
if (
|
||||
packet.is_keyframe
|
||||
and (packet.pts - segment_start_v_pts) * packet.time_base
|
||||
>= MIN_SEGMENT_DURATION
|
||||
):
|
||||
# Calculate the segment duration by multiplying the difference of the next and the current
|
||||
# keyframe presentation timestamps by the time base, which gets us total seconds.
|
||||
segment_duration = (packet.pts - segment_start_v_pts) * packet.time_base
|
||||
# Save segment to outputs
|
||||
for fmt, buffer in outputs.items():
|
||||
buffer.output.close()
|
||||
del audio_packets[buffer.astream]
|
||||
if stream.outputs.get(fmt):
|
||||
hass.loop.call_soon_threadsafe(
|
||||
stream.outputs[fmt].put,
|
||||
Segment(
|
||||
sequence,
|
||||
buffer.segment,
|
||||
segment_duration,
|
||||
(segment_start_v_pts, segment_start_a_pts),
|
||||
),
|
||||
)
|
||||
# Reinitialize
|
||||
initialize_segment(packet.pts)
|
||||
|
||||
# Clear outputs and increment sequence
|
||||
outputs = {}
|
||||
if not first_packet:
|
||||
sequence += 1
|
||||
segment_start_v_pts = packet.pts
|
||||
segment_start_a_pts = int(
|
||||
packet.pts * packet.time_base * AUDIO_SAMPLE_RATE
|
||||
)
|
||||
|
||||
# Initialize outputs
|
||||
for stream_output in stream.outputs.values():
|
||||
if video_stream.name not in stream_output.video_codecs:
|
||||
continue
|
||||
|
||||
a_packet, buffer = create_stream_buffer(
|
||||
stream_output, video_stream, audio_frame
|
||||
)
|
||||
audio_packets[buffer.astream] = a_packet
|
||||
outputs[stream_output.name] = buffer
|
||||
|
||||
# First video packet tends to have a weird dts/pts
|
||||
if first_packet:
|
||||
# If we are attaching to a live stream that does not reset
|
||||
# timestamps for us, we need to do it ourselves by recording
|
||||
# the first presentation timestamp and subtracting it from
|
||||
# subsequent packets we receive.
|
||||
if (packet.pts * packet.time_base) > 1:
|
||||
first_pts = packet.pts
|
||||
packet.dts = 0
|
||||
packet.pts = 0
|
||||
first_packet = False
|
||||
|
||||
# Store packets on each output
|
||||
for buffer in outputs.values():
|
||||
# Check if the format requires audio
|
||||
if audio_packets.get(buffer.astream):
|
||||
a_packet = audio_packets[buffer.astream]
|
||||
a_time_base = a_packet.time_base
|
||||
|
||||
# Determine video start timestamp and duration
|
||||
video_start = packet.pts * packet.time_base
|
||||
video_duration = packet.duration * packet.time_base
|
||||
|
||||
if packet.is_keyframe:
|
||||
# Set first audio packet in sequence to equal video pts
|
||||
a_packet.pts = int(video_start / a_time_base)
|
||||
a_packet.dts = int(video_start / a_time_base)
|
||||
|
||||
# Determine target end timestamp for audio
|
||||
target_pts = int((video_start + video_duration) / a_time_base)
|
||||
while a_packet.pts < target_pts:
|
||||
# Mux audio packet and adjust points until target hit
|
||||
buffer.output.mux(a_packet)
|
||||
a_packet.pts += a_packet.duration
|
||||
a_packet.dts += a_packet.duration
|
||||
audio_packets[buffer.astream] = a_packet
|
||||
|
||||
# Assign the video packet to the new stream & mux
|
||||
packet.stream = buffer.vstream
|
||||
buffer.output.mux(packet)
|
||||
# Update last_dts processed
|
||||
last_dts[packet.stream] = packet.dts
|
||||
# mux video packets immediately, save audio packets to be muxed all at once
|
||||
if packet.stream == video_stream:
|
||||
mux_video_packet(packet) # mutates packet timestamps
|
||||
else:
|
||||
mux_audio_packet(packet) # mutates packet timestamps
|
||||
|
||||
# Close stream
|
||||
for buffer in outputs.values():
|
||||
for buffer, _ in outputs.values():
|
||||
buffer.output.close()
|
||||
container.close()
|
||||
|
@ -1,5 +1,7 @@
|
||||
"""Collection of test helpers."""
|
||||
from fractions import Fraction
|
||||
import io
|
||||
import logging
|
||||
|
||||
import av
|
||||
import numpy as np
|
||||
@ -7,27 +9,59 @@ import numpy as np
|
||||
from homeassistant.components.stream import Stream
|
||||
from homeassistant.components.stream.const import ATTR_STREAMS, DOMAIN
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
def generate_h264_video():
|
||||
AUDIO_SAMPLE_RATE = 8000
|
||||
|
||||
|
||||
def generate_h264_video(container_format="mp4", audio_codec=None):
|
||||
"""
|
||||
Generate a test video.
|
||||
|
||||
See: http://docs.mikeboers.com/pyav/develop/cookbook/numpy.html
|
||||
"""
|
||||
|
||||
def generate_audio_frame(pcm_mulaw=False):
|
||||
"""Generate a blank audio frame."""
|
||||
if pcm_mulaw:
|
||||
audio_frame = av.AudioFrame(format="s16", layout="mono", samples=1)
|
||||
audio_bytes = b"\x00\x00"
|
||||
else:
|
||||
audio_frame = av.AudioFrame(format="dbl", layout="mono", samples=1024)
|
||||
audio_bytes = b"\x00\x00\x00\x00\x00\x00\x00\x00" * 1024
|
||||
audio_frame.planes[0].update(audio_bytes)
|
||||
audio_frame.sample_rate = AUDIO_SAMPLE_RATE
|
||||
audio_frame.time_base = Fraction(1, AUDIO_SAMPLE_RATE)
|
||||
return audio_frame
|
||||
|
||||
duration = 5
|
||||
fps = 24
|
||||
total_frames = duration * fps
|
||||
|
||||
output = io.BytesIO()
|
||||
output.name = "test.mp4"
|
||||
container = av.open(output, mode="w")
|
||||
output.name = "test.mov" if container_format == "mov" else "test.mp4"
|
||||
container = av.open(output, mode="w", format=container_format)
|
||||
|
||||
stream = container.add_stream("libx264", rate=fps)
|
||||
stream.width = 480
|
||||
stream.height = 320
|
||||
stream.pix_fmt = "yuv420p"
|
||||
|
||||
a_packet = None
|
||||
last_a_dts = -1
|
||||
if audio_codec is not None:
|
||||
if audio_codec == "empty": # empty we add a stream but don't mux any audio
|
||||
astream = container.add_stream("aac", AUDIO_SAMPLE_RATE)
|
||||
else:
|
||||
astream = container.add_stream(audio_codec, AUDIO_SAMPLE_RATE)
|
||||
# Need to do it multiple times for some reason
|
||||
while not a_packet:
|
||||
a_packets = astream.encode(
|
||||
generate_audio_frame(pcm_mulaw=audio_codec == "pcm_mulaw")
|
||||
)
|
||||
if a_packets:
|
||||
a_packet = a_packets[0]
|
||||
|
||||
for frame_i in range(total_frames):
|
||||
|
||||
img = np.empty((480, 320, 3))
|
||||
@ -42,6 +76,17 @@ def generate_h264_video():
|
||||
for packet in stream.encode(frame):
|
||||
container.mux(packet)
|
||||
|
||||
if a_packet is not None:
|
||||
a_packet.pts = int(frame_i / (fps * a_packet.time_base))
|
||||
while a_packet.pts * a_packet.time_base * fps < frame_i + 1:
|
||||
a_packet.dts = a_packet.pts
|
||||
if (
|
||||
a_packet.dts > last_a_dts
|
||||
): # avoid writing same dts twice in case of rounding
|
||||
container.mux(a_packet)
|
||||
last_a_dts = a_packet.dts
|
||||
a_packet.pts += a_packet.duration
|
||||
|
||||
# Flush stream
|
||||
for packet in stream.encode():
|
||||
container.mux(packet)
|
||||
|
@ -2,6 +2,7 @@
|
||||
from datetime import timedelta
|
||||
from io import BytesIO
|
||||
|
||||
import av
|
||||
import pytest
|
||||
|
||||
from homeassistant.components.stream.core import Segment
|
||||
@ -75,7 +76,45 @@ async def test_recorder_save():
|
||||
output.name = "test.mp4"
|
||||
|
||||
# Run
|
||||
recorder_save_worker(output, [Segment(1, source, 4, (360000, 176400))])
|
||||
recorder_save_worker(output, [Segment(1, source, 4)], "mp4")
|
||||
|
||||
# Assert
|
||||
assert output.getvalue()
|
||||
|
||||
|
||||
@pytest.mark.skip("Flaky in CI")
|
||||
async def test_record_stream_audio(hass, hass_client):
|
||||
"""
|
||||
Test treatment of different audio inputs.
|
||||
|
||||
Record stream output should have an audio channel when input has
|
||||
a valid codec and audio packets and no audio channel otherwise.
|
||||
"""
|
||||
await async_setup_component(hass, "stream", {"stream": {}})
|
||||
|
||||
for a_codec, expected_audio_streams in (
|
||||
("aac", 1), # aac is a valid mp4 codec
|
||||
("pcm_mulaw", 0), # G.711 is not a valid mp4 codec
|
||||
("empty", 0), # audio stream with no packets
|
||||
(None, 0), # no audio stream
|
||||
):
|
||||
with patch("homeassistant.components.stream.recorder.recorder_save_worker"):
|
||||
# Setup demo track
|
||||
source = generate_h264_video(
|
||||
container_format="mov", audio_codec=a_codec
|
||||
) # mov can store PCM
|
||||
stream = preload_stream(hass, source)
|
||||
recorder = stream.add_provider("recorder")
|
||||
stream.start()
|
||||
|
||||
while True:
|
||||
segment = await recorder.recv()
|
||||
if not segment:
|
||||
break
|
||||
last_segment = segment
|
||||
|
||||
result = av.open(last_segment.segment, "r", format="mp4")
|
||||
|
||||
assert len(result.streams.audio) == expected_audio_streams
|
||||
result.close()
|
||||
stream.stop()
|
||||
|
Loading…
x
Reference in New Issue
Block a user