I am trying to mux H264 encoded data and G711 PCM data into mov multimedia container. I am creating AVPacket from encoded data and initially the PTS and DTS value of video/audio frames is equivalent to AV_NOPTS_VALUE. So I calculated the DTS using current time information. My code -
bool AudioVideoRecorder::WriteVideo(const unsigned char *pData, size_t iDataSize, bool const bIFrame) { ..................................... ..................................... ..................................... AVPacket pkt = {0}; av_init_packet(&pkt); int64_t dts = av_gettime(); dts = av_rescale_q(dts, (AVRational){1, 1000000}, m_pVideoStream->time_base); int duration = 90000 / VIDEO_FRAME_RATE; if(m_prevVideoDts > 0LL) { duration = dts - m_prevVideoDts; } m_prevVideoDts = dts; pkt.pts = AV_NOPTS_VALUE; pkt.dts = m_currVideoDts; m_currVideoDts += duration; pkt.duration = duration; if(bIFrame) { pkt.flags |= AV_PKT_FLAG_KEY; } pkt.stream_index = m_pVideoStream->index; pkt.data = (uint8_t*) pData; pkt.size = iDataSize; int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt); if(ret < 0) { LogErr("Writing video frame failed."); return false; } Log("Writing video frame done."); av_free_packet(&pkt); return true; } bool AudioVideoRecorder::WriteAudio(const unsigned char *pEncodedData, size_t iDataSize) { ................................. ................................. ................................. AVPacket pkt = {0}; av_init_packet(&pkt); int64_t dts = av_gettime(); dts = av_rescale_q(dts, (AVRational){1, 1000000}, (AVRational){1, 90000}); int duration = AUDIO_STREAM_DURATION; // 20 if(m_prevAudioDts > 0LL) { duration = dts - m_prevAudioDts; } m_prevAudioDts = dts; pkt.pts = AV_NOPTS_VALUE; pkt.dts = m_currAudioDts; m_currAudioDts += duration; pkt.duration = duration; pkt.stream_index = m_pAudioStream->index; pkt.flags |= AV_PKT_FLAG_KEY; pkt.data = (uint8_t*) pEncodedData; pkt.size = iDataSize; int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt); if(ret < 0) { LogErr("Writing audio frame failed: %d", ret); return false; } Log("Writing audio frame done."); av_free_packet(&pkt); return true; } And I added stream like this -
AVStream* AudioVideoRecorder::AddMediaStream(enum AVCodecID codecID) { ................................ ................................. pStream = avformat_new_stream(m_pFormatCtx, codec); if (!pStream) { LogErr("Could not allocate stream."); return NULL; } pStream->id = m_pFormatCtx->nb_streams - 1; pCodecCtx = pStream->codec; pCodecCtx->codec_id = codecID; switch(codec->type) { case AVMEDIA_TYPE_VIDEO: pCodecCtx->bit_rate = VIDEO_BIT_RATE; pCodecCtx->width = PICTURE_WIDTH; pCodecCtx->height = PICTURE_HEIGHT; pStream->time_base = (AVRational){1, 90000}; pStream->avg_frame_rate = (AVRational){90000, 1}; pStream->r_frame_rate = (AVRational){90000, 1}; // though the frame rate is variable and around 15 fps pCodecCtx->pix_fmt = STREAM_PIX_FMT; m_pVideoStream = pStream; break; case AVMEDIA_TYPE_AUDIO: pCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16; pCodecCtx->bit_rate = AUDIO_BIT_RATE; pCodecCtx->sample_rate = AUDIO_SAMPLE_RATE; pCodecCtx->channels = 1; m_pAudioStream = pStream; break; default: break; } /* Some formats want stream headers to be separate. */ if (m_pOutputFmt->flags & AVFMT_GLOBALHEADER) m_pFormatCtx->flags |= CODEC_FLAG_GLOBAL_HEADER; return pStream; } There are several problems with this calculation:
The video is laggy and lags behind than audio increasingly with time.
Suppose, an audio frame is received (
WriteAudio(..)) little lately like 3 seconds, then the late frame should be started playing with 3 second delay, but it's not. The delayed frame is played consecutively with previous frame.Sometimes I recorded for ~40 seconds but the file duration is much like 2 minutes, but audio/video is played only few moments like 40 seconds and rest of the file contains nothing and seekbar jumps at en immediately after 40 seconds (tested in VLC).
EDIT:
According to Ronald S. Bultje's suggestion, what I've understand:
m_pAudioStream->time_base = (AVRational){1, 9000}; // actually no need to set as 9000 is already default value for audio as you said m_pVideoStream->time_base = (AVRational){1, 9000}; should be set as now both audio and video streams are now in same time base units.
And for video:
................... ................... int64_t dts = av_gettime(); // get current time in microseconds dts *= 9000; dts /= 1000000; // 1 second = 10^6 microseconds pkt.pts = AV_NOPTS_VALUE; // is it okay? pkt.dts = dts; // and no need to set pkt.duration, right? And for audio: (exactly same as video, right?)
................... ................... int64_t dts = av_gettime(); // get current time in microseconds dts *= 9000; dts /= 1000000; // 1 second = 10^6 microseconds pkt.pts = AV_NOPTS_VALUE; // is it okay? pkt.dts = dts; // and no need to set pkt.duration, right? And I think they are now like sharing same currDts, right? Please correct me if I am wrong anywhere or missing anything.
Also, if I want to use video stream time base as (AVRational){1, frameRate} and audio stream time base as (AVRational){1, sampleRate}, how the correct code should look like?
EDIT 2.0:
m_pAudioStream->time_base = (AVRational){1, VIDEO_FRAME_RATE}; m_pVideoStream->time_base = (AVRational){1, VIDEO_FRAME_RATE}; And
bool AudioVideoRecorder::WriteAudio(const unsigned char *pEncodedData, size_t iDataSize) { ........................... ...................... AVPacket pkt = {0}; av_init_packet(&pkt); int64_t dts = av_gettime() / 1000; // convert into millisecond dts = dts * VIDEO_FRAME_RATE; if(m_dtsOffset < 0) { m_dtsOffset = dts; } pkt.pts = AV_NOPTS_VALUE; pkt.dts = (dts - m_dtsOffset); pkt.stream_index = m_pAudioStream->index; pkt.flags |= AV_PKT_FLAG_KEY; pkt.data = (uint8_t*) pEncodedData; pkt.size = iDataSize; int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt); if(ret < 0) { LogErr("Writing audio frame failed: %d", ret); return false; } Log("Writing audio frame done."); av_free_packet(&pkt); return true; } bool AudioVideoRecorder::WriteVideo(const unsigned char *pData, size_t iDataSize, bool const bIFrame) { ........................................ ................................. AVPacket pkt = {0}; av_init_packet(&pkt); int64_t dts = av_gettime() / 1000; dts = dts * VIDEO_FRAME_RATE; if(m_dtsOffset < 0) { m_dtsOffset = dts; } pkt.pts = AV_NOPTS_VALUE; pkt.dts = (dts - m_dtsOffset); if(bIFrame) { pkt.flags |= AV_PKT_FLAG_KEY; } pkt.stream_index = m_pVideoStream->index; pkt.data = (uint8_t*) pData; pkt.size = iDataSize; int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt); if(ret < 0) { LogErr("Writing video frame failed."); return false; } Log("Writing video frame done."); av_free_packet(&pkt); return true; } Is the last change okay? The video and audio seems synced. Only problem is - the audio is played without the delay regardless the packet arrived in delay. Like -
packet arrival: 1 2 3 4... (then next frame arrived after 3 sec) .. 5
audio played: 1 2 3 4 (no delay) 5
EDIT 3.0:
zeroed audio sample data:
AVFrame* pSilentData; pSilentData = av_frame_alloc(); memset(&pSilentData->data[0], 0, iDataSize); pkt.data = (uint8_t*) pSilentData; pkt.size = iDataSize; av_freep(&pSilentData->data[0]); av_frame_free(&pSilentData); Is this okay? But after writing this into file container, there are dot dot noise during playing the media. Whats the problem?
EDIT 4.0:
Well, For µ-Law audio the zero value is represented as 0xff. So -
memset(&pSilentData->data[0], 0xff, iDataSize); solve my problem.