1#![allow(clippy::single_match)]
2
3use std::borrow::Cow;
4use std::collections::{HashMap, VecDeque};
5use std::fmt::Debug;
6use std::io;
7
8use byteorder::{BigEndian, ReadBytesExt};
9use bytes::{Buf, Bytes};
10use scuffle_amf0::Amf0Value;
11use scuffle_flv::aac::AacPacket;
12use scuffle_flv::audio::{AudioData, AudioDataBody, SoundType};
13use scuffle_flv::av1::Av1Packet;
14use scuffle_flv::avc::AvcPacket;
15use scuffle_flv::hevc::HevcPacket;
16use scuffle_flv::script::ScriptData;
17use scuffle_flv::tag::{FlvTag, FlvTagData};
18use scuffle_flv::video::{EnhancedPacket, FrameType, VideoTagBody, VideoTagHeader};
19use scuffle_mp4::codec::{AudioCodec, VideoCodec};
20use scuffle_mp4::types::ftyp::{FourCC, Ftyp};
21use scuffle_mp4::types::hdlr::{HandlerType, Hdlr};
22use scuffle_mp4::types::mdat::Mdat;
23use scuffle_mp4::types::mdhd::Mdhd;
24use scuffle_mp4::types::mdia::Mdia;
25use scuffle_mp4::types::mfhd::Mfhd;
26use scuffle_mp4::types::minf::Minf;
27use scuffle_mp4::types::moof::Moof;
28use scuffle_mp4::types::moov::Moov;
29use scuffle_mp4::types::mvex::Mvex;
30use scuffle_mp4::types::mvhd::Mvhd;
31use scuffle_mp4::types::smhd::Smhd;
32use scuffle_mp4::types::stbl::Stbl;
33use scuffle_mp4::types::stco::Stco;
34use scuffle_mp4::types::stsc::Stsc;
35use scuffle_mp4::types::stsd::Stsd;
36use scuffle_mp4::types::stsz::Stsz;
37use scuffle_mp4::types::stts::Stts;
38use scuffle_mp4::types::tfdt::Tfdt;
39use scuffle_mp4::types::tfhd::Tfhd;
40use scuffle_mp4::types::tkhd::Tkhd;
41use scuffle_mp4::types::traf::Traf;
42use scuffle_mp4::types::trak::Trak;
43use scuffle_mp4::types::trex::Trex;
44use scuffle_mp4::types::trun::Trun;
45use scuffle_mp4::types::vmhd::Vmhd;
46use scuffle_mp4::BoxType;
47
48mod codecs;
49mod define;
50mod errors;
51
52pub use define::*;
53pub use errors::TransmuxError;
54
55struct Tags {
56 video_sequence_header: Option<VideoSequenceHeader>,
57 audio_sequence_header: Option<AudioSequenceHeader>,
58 scriptdata_tag: Option<HashMap<Cow<'static, str>, Amf0Value<'static>>>,
59}
60
61#[derive(Debug, Clone)]
62pub struct Transmuxer {
63 audio_duration: u64,
66 video_duration: u64,
68 sequence_number: u32,
69 last_video_timestamp: u32,
70 settings: Option<(VideoSettings, AudioSettings)>,
71 tags: VecDeque<FlvTag>,
72}
73
74impl Default for Transmuxer {
75 fn default() -> Self {
76 Self::new()
77 }
78}
79
80impl Transmuxer {
81 pub fn new() -> Self {
82 Self {
83 sequence_number: 1,
84 tags: VecDeque::new(),
85 audio_duration: 0,
86 video_duration: 0,
87 last_video_timestamp: 0,
88 settings: None,
89 }
90 }
91
92 pub fn demux(&mut self, data: Bytes) -> Result<(), TransmuxError> {
94 let mut cursor = io::Cursor::new(data);
95 while cursor.has_remaining() {
96 cursor.read_u32::<BigEndian>()?; if !cursor.has_remaining() {
98 break;
99 }
100
101 let tag = FlvTag::demux(&mut cursor)?;
102 self.tags.push_back(tag);
103 }
104
105 Ok(())
106 }
107
108 pub fn add_tag(&mut self, tag: FlvTag) {
110 self.tags.push_back(tag);
111 }
112
113 pub fn mux(&mut self) -> Result<Option<TransmuxResult>, TransmuxError> {
116 let mut writer = Vec::new();
117
118 let Some((video_settings, _)) = &self.settings else {
119 let Some((video_settings, audio_settings)) = self.init_sequence(&mut writer)? else {
120 if self.tags.len() > 30 {
121 return Err(TransmuxError::NoSequenceHeaders);
123 }
124
125 return Ok(None);
127 };
128
129 self.settings = Some((video_settings.clone(), audio_settings.clone()));
130
131 return Ok(Some(TransmuxResult::InitSegment {
132 data: Bytes::from(writer),
133 audio_settings,
134 video_settings,
135 }));
136 };
137
138 loop {
139 let Some(tag) = self.tags.pop_front() else {
140 return Ok(None);
141 };
142
143 let mdat_data;
144 let total_duration;
145 let trun_sample;
146 let mut is_audio = false;
147 let mut is_keyframe = false;
148
149 let duration =
150 if self.last_video_timestamp == 0 || tag.timestamp_ms == 0 || tag.timestamp_ms < self.last_video_timestamp {
151 1000 } else {
154 let delta = tag.timestamp_ms as f64 - self.last_video_timestamp as f64;
164 let expected_delta = 1000.0 / video_settings.framerate;
165 if (delta - expected_delta).abs() <= 1.0 {
166 1000
167 } else {
168 (delta * video_settings.framerate) as u32
169 }
170 };
171
172 match tag.data {
173 FlvTagData::Audio(AudioData {
174 body: AudioDataBody::Aac(AacPacket::Raw(data)),
175 ..
176 }) => {
177 let (sample, duration) = codecs::aac::trun_sample(&data)?;
178
179 trun_sample = sample;
180 mdat_data = data;
181 total_duration = duration;
182 is_audio = true;
183 }
184 FlvTagData::Video(VideoTagHeader {
185 frame_type,
186 body: VideoTagBody::Avc(AvcPacket::Nalu { composition_time, data }),
187 ..
188 }) => {
189 let composition_time = ((composition_time as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
190
191 let sample = codecs::avc::trun_sample(frame_type, composition_time as u32, duration, &data)?;
192
193 trun_sample = sample;
194 total_duration = duration;
195 mdat_data = data;
196
197 is_keyframe = frame_type == FrameType::Keyframe;
198 }
199 FlvTagData::Video(VideoTagHeader {
200 frame_type,
201 body: VideoTagBody::Enhanced(EnhancedPacket::Av1(Av1Packet::Raw(data))),
202 ..
203 }) => {
204 let sample = codecs::av1::trun_sample(frame_type, duration, &data)?;
205
206 trun_sample = sample;
207 total_duration = duration;
208 mdat_data = data;
209
210 is_keyframe = frame_type == FrameType::Keyframe;
211 }
212 FlvTagData::Video(VideoTagHeader {
213 frame_type,
214 body: VideoTagBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::Nalu { composition_time, data })),
215 ..
216 }) => {
217 let composition_time =
218 ((composition_time.unwrap_or_default() as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
219
220 let sample = codecs::hevc::trun_sample(frame_type, composition_time as i32, duration, &data)?;
221
222 trun_sample = sample;
223 total_duration = duration;
224 mdat_data = data;
225
226 is_keyframe = frame_type == FrameType::Keyframe;
227 }
228 _ => {
229 continue;
231 }
232 }
233
234 let trafs = {
235 let (main_duration, main_id) = if is_audio {
236 (self.audio_duration, 2)
237 } else {
238 (self.video_duration, 1)
239 };
240
241 let mut traf = Traf::new(
242 Tfhd::new(main_id, None, None, None, None, None),
243 Some(Trun::new(vec![trun_sample], None)),
244 Some(Tfdt::new(main_duration)),
245 );
246 traf.optimize();
247
248 vec![traf]
249 };
250
251 let mut moof = Moof::new(Mfhd::new(self.sequence_number), trafs);
252
253 let moof_size = moof.size();
255
256 let traf = moof.traf.get_mut(0).expect("we just created the moof with a traf");
260
261 let trun = traf.trun.as_mut().expect("we just created the video traf with a trun");
263
264 trun.data_offset = Some(moof_size as i32 + 8);
268
269 moof.mux(&mut writer)?;
271
272 Mdat::new(vec![mdat_data]).mux(&mut writer)?;
274
275 self.sequence_number += 1;
277
278 if is_audio {
279 self.audio_duration += total_duration as u64;
280 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
281 data: Bytes::from(writer),
282 ty: MediaType::Audio,
283 keyframe: false,
284 timestamp: self.audio_duration - total_duration as u64,
285 })));
286 } else {
287 self.video_duration += total_duration as u64;
288 self.last_video_timestamp = tag.timestamp_ms;
289 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
290 data: Bytes::from(writer),
291 ty: MediaType::Video,
292 keyframe: is_keyframe,
293 timestamp: self.video_duration - total_duration as u64,
294 })));
295 }
296 }
297 }
298
299 fn find_tags(&self) -> Tags {
301 let tags = self.tags.iter();
302 let mut video_sequence_header = None;
303 let mut audio_sequence_header = None;
304 let mut scriptdata_tag = None;
305
306 for tag in tags {
307 if video_sequence_header.is_some() && audio_sequence_header.is_some() && scriptdata_tag.is_some() {
308 break;
309 }
310
311 match &tag.data {
312 FlvTagData::Video(VideoTagHeader {
313 frame_type: _,
314 body: VideoTagBody::Avc(AvcPacket::SequenceHeader(data)),
315 ..
316 }) => {
317 video_sequence_header = Some(VideoSequenceHeader::Avc(data.clone()));
318 }
319 FlvTagData::Video(VideoTagHeader {
320 frame_type: _,
321 body: VideoTagBody::Enhanced(EnhancedPacket::Av1(Av1Packet::SequenceStart(config))),
322 ..
323 }) => {
324 video_sequence_header = Some(VideoSequenceHeader::Av1(config.clone()));
325 }
326 FlvTagData::Video(VideoTagHeader {
327 frame_type: _,
328 body: VideoTagBody::Enhanced(EnhancedPacket::Hevc(HevcPacket::SequenceStart(config))),
329 ..
330 }) => {
331 video_sequence_header = Some(VideoSequenceHeader::Hevc(config.clone()));
332 }
333 FlvTagData::Audio(AudioData {
334 body: AudioDataBody::Aac(AacPacket::SequenceHeader(data)),
335 sound_size,
336 sound_type,
337 ..
338 }) => {
339 audio_sequence_header = Some(AudioSequenceHeader {
340 data: AudioSequenceHeaderData::Aac(data.clone()),
341 sound_size: *sound_size,
342 sound_type: *sound_type,
343 });
344 }
345 FlvTagData::ScriptData(ScriptData { data, name }) => {
346 if name == "@setDataFrame" || name == "onMetaData" {
347 let meta_object = data.iter().find(|v| matches!(v, Amf0Value::Object(_)));
348
349 if let Some(Amf0Value::Object(meta_object)) = meta_object {
350 scriptdata_tag = Some(meta_object.iter().map(|(k, v)| (k.clone(), v.clone())).collect());
351 }
352 }
353 }
354 _ => {}
355 }
356 }
357
358 Tags {
359 video_sequence_header,
360 audio_sequence_header,
361 scriptdata_tag,
362 }
363 }
364
365 fn init_sequence(
367 &mut self,
368 writer: &mut impl io::Write,
369 ) -> Result<Option<(VideoSettings, AudioSettings)>, TransmuxError> {
370 let Tags {
373 video_sequence_header,
374 audio_sequence_header,
375 scriptdata_tag,
376 } = self.find_tags();
377
378 let Some(video_sequence_header) = video_sequence_header else {
379 return Ok(None);
380 };
381 let Some(audio_sequence_header) = audio_sequence_header else {
382 return Ok(None);
383 };
384
385 let video_codec;
386 let audio_codec;
387 let video_width;
388 let video_height;
389 let audio_channels;
390 let audio_sample_rate;
391 let mut video_fps = 0.0;
392
393 let mut estimated_video_bitrate = 0;
394 let mut estimated_audio_bitrate = 0;
395
396 if let Some(scriptdata_tag) = scriptdata_tag {
397 video_fps = scriptdata_tag
398 .get("framerate")
399 .and_then(|v| match v {
400 Amf0Value::Number(v) => Some(*v),
401 _ => None,
402 })
403 .unwrap_or(0.0);
404
405 estimated_video_bitrate = scriptdata_tag
406 .get("videodatarate")
407 .and_then(|v| match v {
408 Amf0Value::Number(v) => Some((*v * 1024.0) as u32),
409 _ => None,
410 })
411 .unwrap_or(0);
412
413 estimated_audio_bitrate = scriptdata_tag
414 .get("audiodatarate")
415 .and_then(|v| match v {
416 Amf0Value::Number(v) => Some((*v * 1024.0) as u32),
417 _ => None,
418 })
419 .unwrap_or(0);
420 }
421
422 let mut compatiable_brands = vec![FourCC::Iso5, FourCC::Iso6];
423
424 let video_stsd_entry = match video_sequence_header {
425 VideoSequenceHeader::Avc(config) => {
426 compatiable_brands.push(FourCC::Avc1);
427 video_codec = VideoCodec::Avc {
428 constraint_set: config.profile_compatibility,
429 level: config.level_indication,
430 profile: config.profile_indication,
431 };
432
433 let (entry, sps) = codecs::avc::stsd_entry(config)?;
434 if sps.frame_rate != 0.0 {
435 video_fps = sps.frame_rate;
436 }
437
438 video_width = sps.width as u32;
439 video_height = sps.height as u32;
440
441 entry
442 }
443 VideoSequenceHeader::Av1(config) => {
444 compatiable_brands.push(FourCC::Av01);
445 let (entry, seq_obu) = codecs::av1::stsd_entry(config)?;
446
447 video_height = seq_obu.max_frame_height as u32;
448 video_width = seq_obu.max_frame_width as u32;
449
450 let op_point = &seq_obu.operating_points[0];
451
452 video_codec = VideoCodec::Av1 {
453 profile: seq_obu.seq_profile,
454 level: op_point.seq_level_idx,
455 tier: op_point.seq_tier,
456 depth: seq_obu.color_config.bit_depth as u8,
457 monochrome: seq_obu.color_config.mono_chrome,
458 sub_sampling_x: seq_obu.color_config.subsampling_x,
459 sub_sampling_y: seq_obu.color_config.subsampling_y,
460 color_primaries: seq_obu.color_config.color_primaries,
461 transfer_characteristics: seq_obu.color_config.transfer_characteristics,
462 matrix_coefficients: seq_obu.color_config.matrix_coefficients,
463 full_range_flag: seq_obu.color_config.full_color_range,
464 };
465
466 entry
467 }
468 VideoSequenceHeader::Hevc(config) => {
469 compatiable_brands.push(FourCC::Hev1);
470 video_codec = VideoCodec::Hevc {
471 constraint_indicator: config.general_constraint_indicator_flags,
472 level: config.general_level_idc,
473 profile: config.general_profile_idc,
474 profile_compatibility: config.general_profile_compatibility_flags,
475 tier: config.general_tier_flag,
476 general_profile_space: config.general_profile_space,
477 };
478
479 let (entry, sps) = codecs::hevc::stsd_entry(config)?;
480 if sps.frame_rate != 0.0 {
481 video_fps = sps.frame_rate;
482 }
483
484 video_width = sps.width as u32;
485 video_height = sps.height as u32;
486
487 entry
488 }
489 };
490
491 let audio_stsd_entry = match audio_sequence_header.data {
492 AudioSequenceHeaderData::Aac(data) => {
493 compatiable_brands.push(FourCC::Mp41);
494 let (entry, config) =
495 codecs::aac::stsd_entry(audio_sequence_header.sound_size, audio_sequence_header.sound_type, data)?;
496
497 audio_sample_rate = config.sampling_frequency;
498
499 audio_codec = AudioCodec::Aac {
500 object_type: config.audio_object_type,
501 };
502 audio_channels = match audio_sequence_header.sound_type {
503 SoundType::Mono => 1,
504 SoundType::Stereo => 2,
505 _ => return Err(TransmuxError::InvalidAudioChannels),
506 };
507
508 entry
509 }
510 };
511
512 if video_fps == 0.0 {
513 return Err(TransmuxError::InvalidVideoFrameRate);
514 }
515
516 if video_width == 0 || video_height == 0 {
517 return Err(TransmuxError::InvalidVideoDimensions);
518 }
519
520 if audio_sample_rate == 0 {
521 return Err(TransmuxError::InvalidAudioSampleRate);
522 }
523
524 let video_timescale = (1000.0 * video_fps) as u32;
530
531 Ftyp::new(FourCC::Iso5, 512, compatiable_brands).mux(writer)?;
532 Moov::new(
533 Mvhd::new(0, 0, 1000, 0, 1),
534 vec![
535 Trak::new(
536 Tkhd::new(0, 0, 1, 0, Some((video_width, video_height))),
537 None,
538 Mdia::new(
539 Mdhd::new(0, 0, video_timescale, 0),
540 Hdlr::new(HandlerType::Vide, "VideoHandler".to_string()),
541 Minf::new(
542 Stbl::new(
543 Stsd::new(vec![video_stsd_entry]),
544 Stts::new(vec![]),
545 Stsc::new(vec![]),
546 Stco::new(vec![]),
547 Some(Stsz::new(0, vec![])),
548 ),
549 Some(Vmhd::new()),
550 None,
551 ),
552 ),
553 ),
554 Trak::new(
555 Tkhd::new(0, 0, 2, 0, None),
556 None,
557 Mdia::new(
558 Mdhd::new(0, 0, audio_sample_rate, 0),
559 Hdlr::new(HandlerType::Soun, "SoundHandler".to_string()),
560 Minf::new(
561 Stbl::new(
562 Stsd::new(vec![audio_stsd_entry]),
563 Stts::new(vec![]),
564 Stsc::new(vec![]),
565 Stco::new(vec![]),
566 Some(Stsz::new(0, vec![])),
567 ),
568 None,
569 Some(Smhd::new()),
570 ),
571 ),
572 ),
573 ],
574 Some(Mvex::new(vec![Trex::new(1), Trex::new(2)], None)),
575 )
576 .mux(writer)?;
577
578 Ok(Some((
579 VideoSettings {
580 width: video_width,
581 height: video_height,
582 framerate: video_fps,
583 codec: video_codec,
584 bitrate: estimated_video_bitrate,
585 timescale: video_timescale,
586 },
587 AudioSettings {
588 codec: audio_codec,
589 sample_rate: audio_sample_rate,
590 channels: audio_channels,
591 bitrate: estimated_audio_bitrate,
592 timescale: audio_sample_rate,
593 },
594 )))
595 }
596}
597
598#[cfg(test)]
599mod tests;