2010-06-03 3 views
1

'패킷 손실 은폐'를 사용하여 오디오 스트림에서 손실 된 PCM 프레임을 숨길 수 있습니다. 불행히도, 모든 라이센스 제한 및 코드 확장없이 액세스 할 수있는 라이브러리를 찾을 수 없습니다 (... 몇 가지 제안을 위해).PCM 스트림에서 패킷 손실을 숨기기

나는 PLC를 구현하는 Asterisk 프로젝트를 위해 Steve Underwood가 작성한 GPL 코드를 배치했다. 몇 가지 제한 사항이 있습니다. Steve가 그의 코드에서 제안한 것처럼, 그의 알고리즘은 약간의 작업으로 다른 스트림에 적용될 수 있습니다. 현재이 코드는 8kHz 16 비트 부호있는 모노 스트림과 함께 작동합니다.

코드의 변형은 Google Code Search의 간단한 검색을 통해 찾을 수 있습니다.

다른 스트림에서 작동하도록 코드를 조정할 수 있기를 바랍니다. 초기 목표는 8 + kHz, 16 비트 부호있는 멀티 채널 오디오 (모두 C++ 환경에서) 알고리즘을 조정하는 것입니다. 궁극적으로 GPL 라이센스에 따라 다른 사람들에게 이익이 될 수 있기를 희망합니다.

내 노력으로 아래 코드가 첨부되어 있습니다. 이 코드에는 주어진 확률로 여러 프레임을 "떨어 뜨릴"주요 기능이 포함되어 있습니다. 불행하게도이 코드는 예상대로 작동하지 않습니다. gdb에서 실행할 때 EXC_BAD_ACCESS를 받고 있지만 'bt'명령을 사용하면 gdb에서 추적을 얻지 못합니다. 분명히, 나는 메모리에 trampimg 일부 확실하지만 정확히 어디 있습니다. 나는 amdf_pitch 기능을 주석 처리하면 코드

int main (int argc, char *argv[]) 
{ 
std::ifstream fin("C:\\cc32kHz.pcm"); 

if(!fin.is_open()) 
{ 
    std::cout << "Failed to open input file" << std::endl; 
    return 1; 
} 

std::ofstream fout_repaired("C:\\cc32kHz_repaired.pcm"); 

if(!fout_repaired.is_open()) 
{ 
    std::cout << "Failed to open output repaired file" << std::endl; 
    return 1; 
} 

std::ofstream fout_lossy("C:\\cc32kHz_lossy.pcm"); 

if(!fout_lossy.is_open()) 
{ 
    std::cout << "Failed to open output repaired file" << std::endl; 
    return 1; 
} 

audio::PcmConcealer Concealer; 
Concealer.Init(1, 16, 32000); 

//Generate random numbers; 
srand(time(NULL)); 

int value = 0; 
int probability = 5; 

while(!fin.eof()) 
{ 
    char arr[2]; 
    fin.read(arr, 2); 

    //Generate's random number; 
    value = rand() % 100 + 1; 

    if(value <= probability) 
    { 
    char blank[2] = {0x00, 0x00}; 

    fout_lossy.write(blank, 2); 

    //Fill in data; 
    Concealer.Fill((int16_t *)blank, 1); 
    fout_repaired.write(blank, 2); 
    } 
    else 
    { 
    //Write data to file; 
    fout_repaired.write(arr, 2); 
    fout_lossy.write(arr, 2); 

    Concealer.Receive((int16_t *)arr, 1); 
    } 
} 

fin.close(); 
fout_repaired.close(); 
fout_lossy.close(); 

return 0; 
} 

... 충돌없이

/* 
* Code adapted from Steve Underwood of the Asterisk Project. This code inherits 
* the same licensing restrictions as the Asterisk Project. 
*/ 

#include "audio/PcmConcealer.hpp" 

/* We do a straight line fade to zero volume in 50ms when we are filling in for missing data. */ 
#define ATTENUATION_INCREMENT  0.0025        /* Attenuation per sample */ 


#if !defined(INT16_MAX) 
#define INT16_MAX  (32767) 
#define INT16_MIN  (-32767-1) 
#endif 


#ifdef WIN32 
inline double rint(double x) 
{ 
    return floor(x + 0.5); 
} 
#endif 

inline short fsaturate(double damp) 
{ 
    if (damp > 32767.0) 
     return INT16_MAX; 

    if (damp < -32768.0) 
     return INT16_MIN; 

return (short)rint(damp); 
} 

namespace audio 
{ 

PcmConcealer::PcmConcealer() : Initialized(false) 
{ 


} 

PcmConcealer::~PcmConcealer() 
{ 
Destroy(); 
} 

void PcmConcealer::Init(int channels, int bit_depth, int sample_rate) 
{ 
if(Initialized) 
    return; 

if(channels <= 0 || bit_depth != 16) 
    return; 

Initialized = true; 

channel_count = channels; 
this->sample_rate = sample_rate; 

////////////// 

double min = PLC_PITCH_MIN(sample_rate); 
int imin = (int)min; 

double max = PLC_PITCH_MAX(sample_rate); 
int imax = (int)max; 

plc_pitch_min = imin; 
plc_pitch_max = imax; 
plc_pitch_overlap_max = (plc_pitch_min >> 2); 
correlation_span = CORRELATION_SPAN(sample_rate); 
plc_history_len = correlation_span + plc_pitch_min; 

////////////// 

for(int i = 0; i < channel_count; i ++) 
{ 
    plc_state_t *t = new plc_state_t; 
    memset(t, 0, sizeof(plc_state_t)); 

    t->pitchbuf = new float[plc_pitch_min]; 
    t->history = new short[plc_history_len]; 

    ChannelStates.push_back(t); 
} 
} 

void PcmConcealer::Destroy() 
{ 
if(!Initialized) 
    return; 

while(ChannelStates.size()) 
{ 
    plc_state_t *s = ChannelStates.at(0); 

    if(s) 
    { 
    if(s->history) delete s->history; 
    if(s->pitchbuf) delete s->pitchbuf; 

    memset(s, 0, sizeof(plc_state_t)); 
    delete s; 
    } 

    ChannelStates.erase(ChannelStates.begin()); 
} 

ChannelStates.clear(); 

Initialized = false; 
} 

//Process a block of received audio samples. 
int PcmConcealer::Receive(short amp[], int frames) 
{ 
if(!Initialized) 
    return 0; 

int j = 0; 

for(int k = 0; k < ChannelStates.size(); k++) 
{ 
    int i; 
    int overlap_len; 
    int pitch_overlap; 

    float old_step; 
    float new_step; 
    float old_weight; 
    float new_weight; 
    float gain; 

    plc_state_t *s = ChannelStates.at(k); 

    if (s->missing_samples) 
    { 
    /* Although we have a real signal, we need to smooth it to fit well 
    with the synthetic signal we used for the previous block */ 

    /* The start of the real data is overlapped with the next 1/4 cycle 
    of the synthetic data. */ 
    pitch_overlap = s->pitch >> 2; 

    if (pitch_overlap > frames) 
    pitch_overlap = frames; 

    gain = 1.0 - s->missing_samples * ATTENUATION_INCREMENT; 

    if (gain < 0.0) 
    gain = 0.0; 

    new_step = 1.0/pitch_overlap; 
    old_step = new_step*gain; 
    new_weight = new_step; 
    old_weight = (1.0 - new_step)*gain; 

    for (i = 0; i < pitch_overlap; i++) 
    { 
    int index = (i * channel_count) + j; 

    amp[index] = fsaturate(old_weight * s->pitchbuf[s->pitch_offset] + new_weight * amp[index]); 

    if (++s->pitch_offset >= s->pitch) 
    s->pitch_offset = 0; 

    new_weight += new_step; 
    old_weight -= old_step; 

    if (old_weight < 0.0) 
    old_weight = 0.0; 
    } 

    s->missing_samples = 0; 
    } 

    save_history(s, amp, j, frames); 

    j++; 
} 

    return frames; 
} 

//Fill-in a block of missing audio samples. 
int PcmConcealer::Fill(short amp[], int frames) 
{ 
if(!Initialized) 
    return 0; 

int j =0; 

for(int k = 0; k < ChannelStates.size(); k++) 
{ 
    short *tmp = new short[plc_pitch_overlap_max]; 

    int i; 
    int pitch_overlap; 

    float old_step; 
    float new_step; 
    float old_weight; 
    float new_weight; 
    float gain; 

    short *orig_amp; 
    int orig_len; 

    orig_amp = amp; 
    orig_len = frames; 

    plc_state_t *s = ChannelStates.at(k); 

    if (s->missing_samples == 0) 
    { 
    // As the gap in real speech starts we need to assess the last known pitch, 
    //and prepare the synthetic data we will use for fill-in 
    normalise_history(s); 
    s->pitch = amdf_pitch(plc_pitch_min, plc_pitch_max, s->history + plc_history_len - correlation_span - plc_pitch_min, j, correlation_span); 

    // We overlap a 1/4 wavelength 
    pitch_overlap = s->pitch >> 2; 

    // Cook up a single cycle of pitch, using a single of the real signal with 1/4 
    //cycle OLA'ed to make the ends join up nicely 
    // The first 3/4 of the cycle is a simple copy 
    for (i = 0; i < s->pitch - pitch_overlap; i++) 
    s->pitchbuf[i] = s->history[plc_history_len - s->pitch + i]; 

    // The last 1/4 of the cycle is overlapped with the end of the previous cycle 
    new_step = 1.0/pitch_overlap; 
    new_weight = new_step; 

    for ( ; i < s->pitch; i++) 
    { 
    s->pitchbuf[i] = s->history[plc_history_len - s->pitch + i]*(1.0 - new_weight) + s->history[plc_history_len - 2*s->pitch + i]*new_weight; 
    new_weight += new_step; 
    } 

    // We should now be ready to fill in the gap with repeated, decaying cycles 
    // of what is in pitchbuf 

    // We need to OLA the first 1/4 wavelength of the synthetic data, to smooth 
    // it into the previous real data. To avoid the need to introduce a delay 
    // in the stream, reverse the last 1/4 wavelength, and OLA with that. 

    gain = 1.0; 
    new_step = 1.0/pitch_overlap; 
    old_step = new_step; 
    new_weight = new_step; 
    old_weight = 1.0 - new_step; 

    for (i = 0; i < pitch_overlap; i++) 
    { 
    int index = (i * channel_count) + j; 

    amp[index] = fsaturate(old_weight * s->history[plc_history_len - 1 - i] + new_weight * s->pitchbuf[i]); 
    new_weight += new_step; 
    old_weight -= old_step; 

    if (old_weight < 0.0) 
    old_weight = 0.0; 
    } 

    s->pitch_offset = i; 
    } 
    else 
    { 
    gain = 1.0 - s->missing_samples*ATTENUATION_INCREMENT; 
    i = 0; 
    } 

    for ( ; gain > 0.0 && i < frames; i++) 
    { 
    int index = (i * channel_count) + j; 

    amp[index] = s->pitchbuf[s->pitch_offset]*gain; 
    gain -= ATTENUATION_INCREMENT; 

    if (++s->pitch_offset >= s->pitch) 
    s->pitch_offset = 0; 
    } 

    for ( ; i < frames; i++) 
    { 
    int index = (i * channel_count) + j; 
    amp[i] = 0; 
    } 

    s->missing_samples += orig_len; 
    save_history(s, amp, j, frames); 

    delete [] tmp; 

    j++; 
    } 

return frames; 
} 

void PcmConcealer::save_history(plc_state_t *s, short *buf, int channel_index, int frames) 
{ 
    if (frames >= plc_history_len) 
    { 
     /* Just keep the last part of the new data, starting at the beginning of the buffer */ 
     //memcpy(s->history, buf + len - plc_history_len, sizeof(short)*plc_history_len); 

    int frames_to_copy = plc_history_len; 

    for(int i = 0; i < frames_to_copy; i ++) 
    { 
    int index = (channel_count * (i + frames - plc_history_len)) + channel_index; 
    s->history[i] = buf[index]; 
    } 

     s->buf_ptr = 0; 
     return; 
    } 

if (s->buf_ptr + frames > plc_history_len) 
    { 
     /* Wraps around - must break into two sections */ 
     //memcpy(s->history + s->buf_ptr, buf, sizeof(short)*(plc_history_len - s->buf_ptr)); 

    short *hist_ptr = s->history + s->buf_ptr; 
    int frames_to_copy = plc_history_len - s->buf_ptr; 

    for(int i = 0; i < frames_to_copy; i ++) 
    { 
    int index = (channel_count * i) + channel_index; 
    hist_ptr[i] = buf[index]; 
    } 

     frames -= (plc_history_len - s->buf_ptr); 


     //memcpy(s->history, buf + (plc_history_len - s->buf_ptr), sizeof(short)*len); 

    frames_to_copy = frames; 

    for(int i = 0; i < frames_to_copy; i ++) 
    { 
    int index = (channel_count * (i + (plc_history_len - s->buf_ptr))) + channel_index; 
    s->history[i] = buf[index]; 
    } 

     s->buf_ptr = frames; 
     return; 
    } 

    /* Can use just one section */ 
    //memcpy(s->history + s->buf_ptr, buf, sizeof(short)*len); 

short *hist_ptr = s->history + s->buf_ptr; 
int frames_to_copy = frames; 

for(int i = 0; i < frames_to_copy; i ++) 
{ 
    int index = (channel_count * i) + channel_index; 
    hist_ptr[i] = buf[index]; 
} 

s->buf_ptr += frames; 
} 

void PcmConcealer::normalise_history(plc_state_t *s) 
{ 
    short *tmp = new short[plc_history_len]; 

    if (s->buf_ptr == 0) 
     return; 

    memcpy(tmp, s->history, sizeof(short)*s->buf_ptr); 
    memcpy(s->history, s->history + s->buf_ptr, sizeof(short)*(plc_history_len - s->buf_ptr)); 
    memcpy(s->history + plc_history_len - s->buf_ptr, tmp, sizeof(short)*s->buf_ptr); 

    s->buf_ptr = 0; 

delete [] tmp; 
} 

int PcmConcealer::amdf_pitch(int min_pitch, int max_pitch, short amp[], int channel_index, int frames) 
{ 
    int i; 
    int j; 
    int acc; 
    int min_acc; 
    int pitch; 

    pitch = min_pitch; 
    min_acc = INT_MAX; 

    for (i = max_pitch; i <= min_pitch; i++) 
    { 
     acc = 0; 

    for (j = 0; j < frames; j++) 
    { 
    int index1 = (channel_count * (i+j)) + channel_index; 
    int index2 = (channel_count * j) + channel_index; 

    //std::cout << "Index 1: " << index1 << ", Index 2: " << index2 << std::endl; 

      acc += abs(amp[index1] - amp[index2]); 
     } 

    if (acc < min_acc) 
     { 
      min_acc = acc; 
      pitch = i; 
     } 
    } 

std::cout << "Pitch: " << pitch << std::endl; 

    return pitch; 
} 



} 

P.S. PcmConcealer.hpp

/* 
* Code adapted from Steve Underwood of the Asterisk Project. This code inherits 
* the same licensing restrictions as the Asterisk Project. 
*/ 


#ifndef __PCMCONCEALER_HPP__ 
#define __PCMCONCEALER_HPP__ 

/** 

1. What does it do? 
The packet loss concealment module provides a suitable synthetic fill-in signal, 
to minimise the audible effect of lost packets in VoIP applications. It is not 
tied to any particular codec, and could be used with almost any codec which does not 
specify its own procedure for packet loss concealment. 

Where a codec specific concealment procedure exists, the algorithm is usually built 
around knowledge of the characteristics of the particular codec. It will, therefore, 
generally give better results for that particular codec than this generic concealer will. 

2. How does it work? 
While good packets are being received, the plc_rx() routine keeps a record of the trailing 
section of the known speech signal. If a packet is missed, plc_fillin() is called to produce 
a synthetic replacement for the real speech signal. The average mean difference function 
(AMDF) is applied to the last known good signal, to determine its effective pitch. 
Based on this, the last pitch period of signal is saved. Essentially, this cycle of speech 
will be repeated over and over until the real speech resumes. However, several refinements 
are needed to obtain smooth pleasant sounding results. 

- The two ends of the stored cycle of speech will not always fit together smoothly. This can 
    cause roughness, or even clicks, at the joins between cycles. To soften this, the 
    1/4 pitch period of real speech preceeding the cycle to be repeated is blended with the last 
    1/4 pitch period of the cycle to be repeated, using an overlap-add (OLA) technique (i.e. 
    in total, the last 5/4 pitch periods of real speech are used). 

- The start of the synthetic speech will not always fit together smoothly with the tail of 
    real speech passed on before the erasure was identified. Ideally, we would like to modify 
    the last 1/4 pitch period of the real speech, to blend it into the synthetic speech. However, 
    it is too late for that. We could have delayed the real speech a little, but that would 
    require more buffer manipulation, and hurt the efficiency of the no-lost-packets case 
    (which we hope is the dominant case). Instead we use a degenerate form of OLA to modify 
    the start of the synthetic data. The last 1/4 pitch period of real speech is time reversed, 
    and OLA is used to blend it with the first 1/4 pitch period of synthetic speech. The result 
    seems quite acceptable. 

- As we progress into the erasure, the chances of the synthetic signal being anything like 
    correct steadily fall. Therefore, the volume of the synthesized signal is made to decay 
    linearly, such that after 50ms of missing audio it is reduced to silence. 

- When real speech resumes, an extra 1/4 pitch period of sythetic speech is blended with the 
    start of the real speech. If the erasure is small, this smoothes the transition. If the erasure 
    is long, and the synthetic signal has faded to zero, the blending softens the start up of the 
    real signal, avoiding a kind of "click" or "pop" effect that might occur with a sudden onset. 

3. How do I use it? 
Before audio is processed, call plc_init() to create an instance of the packet loss 
concealer. For each received audio packet that is acceptable (i.e. not including those being 
dropped for being too late) call plc_rx() to record the content of the packet. Note this may 
modify the packet a little after a period of packet loss, to blend real synthetic data smoothly. 
When a real packet is not available in time, call plc_fillin() to create a sythetic substitute. 
That's it! 

*/ 


/*! Minimum allowed pitch (66 Hz) */ 
#define PLC_PITCH_MIN(SAMPLE_RATE) ((double)(SAMPLE_RATE)/66.6) 

/*! Maximum allowed pitch (200 Hz) */ 
#define PLC_PITCH_MAX(SAMPLE_RATE) ((SAMPLE_RATE)/200) 

/*! Maximum pitch OLA window */ 
//#define PLC_PITCH_OVERLAP_MAX(SAMPLE_RATE) ((PLC_PITCH_MIN(SAMPLE_RATE)) >> 2) 

/*! The length over which the AMDF function looks for similarity (20 ms) */ 
#define CORRELATION_SPAN(SAMPLE_RATE) ((20 * (SAMPLE_RATE))/1000) 

/*! History buffer length. The buffer must also be at leat 1.25 times 
    PLC_PITCH_MIN, but that is much smaller than the buffer needs to be for 
    the pitch assessment. */ 
//#define PLC_HISTORY_LEN(SAMPLE_RATE) ((CORRELATION_SPAN(SAMPLE_RATE)) + (PLC_PITCH_MIN(SAMPLE_RATE))) 


namespace audio 
{ 


typedef struct 
{ 
    /*! Consecutive erased samples */ 
    int missing_samples; 

    /*! Current offset into pitch period */ 
    int pitch_offset; 

/*! Pitch estimate */ 
    int pitch; 

/*! Buffer for a cycle of speech */ 
    float *pitchbuf;//[PLC_PITCH_MIN]; 

/*! History buffer */ 
    short *history;//[PLC_HISTORY_LEN]; 

/*! Current pointer into the history buffer */ 
    int buf_ptr; 
} plc_state_t; 


class PcmConcealer 
{ 
public: 
PcmConcealer(); 

~PcmConcealer(); 

void Init(int channels, int bit_depth, int sample_rate); 

//Process a block of received audio samples. 
int Receive(short amp[], int frames); 

//Fill-in a block of missing audio samples. 
int Fill(short amp[], int frames); 

void Destroy(); 

private: 

int amdf_pitch(int min_pitch, int max_pitch, short amp[], int channel_index, int frames); 
void save_history(plc_state_t *s, short *buf, int channel_index, int frames); 
void normalise_history(plc_state_t *s); 

/** Holds the states of each of the channels **/ 
std::vector< plc_state_t * > ChannelStates; 

int plc_pitch_min; 
int plc_pitch_max; 
int plc_pitch_overlap_max; 
int correlation_span; 
int plc_history_len; 

int channel_count; 
int sample_rate; 

bool Initialized; 
}; 


} 

#endif 

PcmConcealer.cpp를 실행 - 나는 디지털 오디오가 나의 장점이 아니라고 고백해야한다 ...

답변

0

문제가 해결되었습니다. 문제는 amdf_pitch 함수 내에 있습니다. 다른 곳에서도 약간의 버그가있었습니다 (수리되었습니다). 결과적으로 코드는 주어진 확률로 테스트 베드 삽입 공백을 실행합니다.

Audacity를 사용하여 테스트 베드를 통해 생성 된 원시 PCM 스트림을 연구했습니다. 빈 프레임 세트가 발생하면 예상대로 스무딩이 수신에서부터 공백으로 발생합니다. 그러나 공란에서 유효한/수신 된 데이터로 변경하면이 단계에서 스무딩이 작동하지 않는 것으로 보이므로 클릭이 발생합니다. 어떤 제안?

int main (int argc, char *argv[]) 
{ 
    std::ifstream fin("C:\\cc32kHz.pcm", std::ios::binary); 

    if(!fin.is_open()) 
    { 
     std::cout << "Failed to open input file" << std::endl; 
     return 1; 
    } 

    std::ofstream fout_repaired("C:\\cc32kHz_repaired.pcm", std::ios::binary); 

    if(!fout_repaired.is_open()) 
    { 
     std::cout << "Failed to open output repaired file" << std::endl; 
     return 1; 
    } 

    std::ofstream fout_lossy("C:\\cc32kHz_lossy.pcm", std::ios::binary); 

    if(!fout_lossy.is_open()) 
    { 
     std::cout << "Failed to open output repaired file" << std::endl; 
     return 1; 
    } 

    audio::PcmConcealer Concealer; 
    Concealer.Init(1, 16, 32000); //1-channel, 16-bit, 32kHz 

    //Generate random numbers; 
    srand(time(NULL)); 

    int value = 0; 
    int probability = 3; 

    int old_bytes_read = 0; 

    while(!fin.eof()) 
    { 
     char arr[1024]; 
     fin.read(arr, 1024); 

     int total_bytes_read = fin.tellg(); 
     int bytes_read = total_bytes_read - old_bytes_read; 
     old_bytes_read = total_bytes_read; 

     if(!bytes_read) 
      continue; //Probably reached EOF; 

     //Generate's random number; 
     value = rand() % 100 + 1; 

     if(value <= probability) 
     { 
      char blank[1024] = {0x00, 0x00}; 

      fout_lossy.write(blank, 1024); 

      //Fill in data; 
      Concealer.Fill((int16_t *)blank, 512); 
      fout_repaired.write(blank, 1024); 
     } 
     else 
     { 
      //Write data to file; 
      fout_repaired.write(arr, 1024); 
      fout_lossy.write(arr, 1024); 

      Concealer.Receive((int16_t *)arr, 512); 
     } 
    } 

    fin.close(); 
    fout_repaired.close(); 
    fout_lossy.close(); 

    return 0; 
} 

PcmConcealer.hpp

/* 
* PcmConcealer.hpp 
* Code adapted from Steve Underwood of the Asterisk Project. This code inherits 
* the same licensing restrictions as the Asterisk Project. 
*/ 



#ifndef __PCMCONCEALER_HPP__ 
#define __PCMCONCEALER_HPP__ 

/** 

1. What does it do? 
The packet loss concealment module provides a suitable synthetic fill-in signal, 
to minimise the audible effect of lost packets in VoIP applications. It is not 
tied to any particular codec, and could be used with almost any codec which does not 
specify its own procedure for packet loss concealment. 

Where a codec specific concealment procedure exists, the algorithm is usually built 
around knowledge of the characteristics of the particular codec. It will, therefore, 
generally give better results for that particular codec than this generic concealer will. 

2. How does it work? 
While good packets are being received, the plc_rx() routine keeps a record of the trailing 
section of the known speech signal. If a packet is missed, plc_fillin() is called to produce 
a synthetic replacement for the real speech signal. The average mean difference function 
(AMDF) is applied to the last known good signal, to determine its effective pitch. 
Based on this, the last pitch period of signal is saved. Essentially, this cycle of speech 
will be repeated over and over until the real speech resumes. However, several refinements 
are needed to obtain smooth pleasant sounding results. 

- The two ends of the stored cycle of speech will not always fit together smoothly. This can 
    cause roughness, or even clicks, at the joins between cycles. To soften this, the 
    1/4 pitch period of real speech preceeding the cycle to be repeated is blended with the last 
    1/4 pitch period of the cycle to be repeated, using an overlap-add (OLA) technique (i.e. 
    in total, the last 5/4 pitch periods of real speech are used). 

- The start of the synthetic speech will not always fit together smoothly with the tail of 
    real speech passed on before the erasure was identified. Ideally, we would like to modify 
    the last 1/4 pitch period of the real speech, to blend it into the synthetic speech. However, 
    it is too late for that. We could have delayed the real speech a little, but that would 
    require more buffer manipulation, and hurt the efficiency of the no-lost-packets case 
    (which we hope is the dominant case). Instead we use a degenerate form of OLA to modify 
    the start of the synthetic data. The last 1/4 pitch period of real speech is time reversed, 
    and OLA is used to blend it with the first 1/4 pitch period of synthetic speech. The result 
    seems quite acceptable. 

- As we progress into the erasure, the chances of the synthetic signal being anything like 
    correct steadily fall. Therefore, the volume of the synthesized signal is made to decay 
    linearly, such that after 50ms of missing audio it is reduced to silence. 

- When real speech resumes, an extra 1/4 pitch period of sythetic speech is blended with the 
    start of the real speech. If the erasure is small, this smoothes the transition. If the erasure 
    is long, and the synthetic signal has faded to zero, the blending softens the start up of the 
    real signal, avoiding a kind of "click" or "pop" effect that might occur with a sudden onset. 

3. How do I use it? 
Before audio is processed, call plc_init() to create an instance of the packet loss 
concealer. For each received audio packet that is acceptable (i.e. not including those being 
dropped for being too late) call plc_rx() to record the content of the packet. Note this may 
modify the packet a little after a period of packet loss, to blend real synthetic data smoothly. 
When a real packet is not available in time, call plc_fillin() to create a sythetic substitute. 
That's it! 

*/ 


/*! Minimum allowed pitch (66 Hz) */ 
#define PLC_PITCH_MIN(SAMPLE_RATE) ((double)(SAMPLE_RATE)/66.6) 

/*! Maximum allowed pitch (200 Hz) */ 
#define PLC_PITCH_MAX(SAMPLE_RATE) ((SAMPLE_RATE)/200) 

/*! Maximum pitch OLA window */ 
//#define PLC_PITCH_OVERLAP_MAX(SAMPLE_RATE) ((PLC_PITCH_MIN(SAMPLE_RATE)) >> 2) 

/*! The length over which the AMDF function looks for similarity (20 ms) */ 
#define CORRELATION_SPAN(SAMPLE_RATE) ((20 * (SAMPLE_RATE))/1000) 

/*! History buffer length. The buffer must also be at leat 1.25 times 
    PLC_PITCH_MIN, but that is much smaller than the buffer needs to be for 
    the pitch assessment. */ 
//#define PLC_HISTORY_LEN(SAMPLE_RATE) ((CORRELATION_SPAN(SAMPLE_RATE)) + (PLC_PITCH_MIN(SAMPLE_RATE))) 


namespace audio 
{ 


typedef struct 
{ 
    /*! Consecutive erased samples */ 
    int missing_samples; 

    /*! Current offset into pitch period */ 
    int pitch_offset; 

    /*! Pitch estimate */ 
    int pitch; 

    /*! Buffer for a cycle of speech */ 
    float *pitchbuf;//[PLC_PITCH_MIN]; 

    /*! History buffer */ 
    short *history;//[PLC_HISTORY_LEN]; 

    /*! Current pointer into the history buffer */ 
    int buf_ptr; 
} plc_state_t; 


class PcmConcealer 
{ 
public: 
    PcmConcealer(); 

    ~PcmConcealer(); 

    void Init(int channels, int bit_depth, int sample_rate); 

    //Process a block of received audio samples. 
    int Receive(short amp[], int frames); 

    //Fill-in a block of missing audio samples. 
    int Fill(short amp[], int frames); 

    void Destroy(); 

private: 

    inline int amdf_pitch(int min_pitch, int max_pitch, short amp[], int frames); 
    void save_history(plc_state_t *s, short *buf, int channel_index, int frames); 
    void normalise_history(plc_state_t *s); 

    /** Holds the states of each of the channels **/ 
    std::vector< plc_state_t * > ChannelStates; 

    int plc_pitch_min; 
    int plc_pitch_max; 
    int plc_pitch_overlap_max; 
    int correlation_span; 
    int plc_history_len; 

    int channel_count; 
    int sample_rate; 

    bool Initialized; 
}; 


} 

#endif 

PcmConcealer.cpp

/* 
* PcmConcealer.cpp 
* 
* Code adapted from Steve Underwood of the Asterisk Project. This code inherits 
* the same licensing restrictions as the Asterisk Project. 
*/ 

#include "audio/PcmConcealer.hpp" 

/* We do a straight line fade to zero volume in 50ms when we are filling in for missing data. */ 
#define ATTENUATION_INCREMENT  0.0025        /* Attenuation per sample */ 


#ifndef INT16_MAX 
#define INT16_MAX  (32767) 
#endif 

#ifndef INT16_MIN 
#define INT16_MIN  (-32767-1) 
#endif 


#ifdef WIN32 
inline double rint(double x) 
{ 
    return floor(x + 0.5); 
} 
#endif 

inline short fsaturate(double damp) 
{ 
    if (damp > 32767.0) 
     return INT16_MAX; 

    if (damp < -32768.0) 
     return INT16_MIN; 

    return (short)rint(damp); 
} 

namespace audio 
{ 

PcmConcealer::PcmConcealer() : Initialized(false) 
{ 


} 

PcmConcealer::~PcmConcealer() 
{ 
    Destroy(); 
} 

void PcmConcealer::Init(int channels, int bit_depth, int sample_rate) 
{ 
    if(Initialized) 
     return; 

    if(channels <= 0 || bit_depth != 16) 
     return; 

    Initialized = true; 

    channel_count = channels; 
    this->sample_rate = sample_rate; 

    ////////////// 

    double min = PLC_PITCH_MIN(sample_rate); 
    int imin = (int)min; 

    double max = PLC_PITCH_MAX(sample_rate); 
    int imax = (int)max; 

    plc_pitch_min = imin; 
    plc_pitch_max = imax; 
    plc_pitch_overlap_max = (plc_pitch_min >> 2); 
    correlation_span = CORRELATION_SPAN(sample_rate); 
    plc_history_len = correlation_span + plc_pitch_min; 

    ////////////// 

    for(int i = 0; i < channel_count; i ++) 
    { 
     plc_state_t *t = new plc_state_t; 
     memset(t, 0, sizeof(plc_state_t)); 

     t->pitchbuf = new float[plc_pitch_min]; 
     t->history = new short[plc_history_len]; 

     ChannelStates.push_back(t); 
    } 
} 

void PcmConcealer::Destroy() 
{ 
    if(!Initialized) 
     return; 

    while(ChannelStates.size()) 
    { 
     plc_state_t *s = ChannelStates.at(0); 

     if(s) 
     { 
      if(s->history) delete s->history; 
      if(s->pitchbuf) delete s->pitchbuf; 

      memset(s, 0, sizeof(plc_state_t)); 
      delete s; 
     } 

     ChannelStates.erase(ChannelStates.begin()); 
    } 

    ChannelStates.clear(); 

    Initialized = false; 
} 

//Process a block of received audio samples. 
int PcmConcealer::Receive(short amp[], int frames) 
{ 
    if(!Initialized) 
     return 0; 

    int j = 0; 

    for(int k = 0; k < ChannelStates.size(); k++) 
    { 
     int i; 
     int overlap_len; 
     int pitch_overlap; 

     float old_step; 
     float new_step; 
     float old_weight; 
     float new_weight; 
     float gain; 

     plc_state_t *s = ChannelStates.at(k); 

     if (s->missing_samples) 
     { 
      /* Although we have a real signal, we need to smooth it to fit well 
       with the synthetic signal we used for the previous block */ 

      /* The start of the real data is overlapped with the next 1/4 cycle 
       of the synthetic data. */ 
      pitch_overlap = s->pitch >> 2; 


      if (pitch_overlap > frames) 
       pitch_overlap = frames; 

      gain = 1.0 - s->missing_samples * ATTENUATION_INCREMENT; 

      if (gain < 0.0) 
       gain = 0.0; 

      new_step = 1.0/pitch_overlap; 
      old_step = new_step*gain; 
      new_weight = new_step; 
      old_weight = (1.0 - new_step)*gain; 

      for (i = 0; i < pitch_overlap; i++) 
      { 
       int index = (i * channel_count) + j; 

       amp[index] = fsaturate(old_weight * s->pitchbuf[s->pitch_offset] + new_weight * amp[index]); 

       if (++s->pitch_offset >= s->pitch) 
        s->pitch_offset = 0; 

       new_weight += new_step; 
       old_weight -= old_step; 

       if (old_weight < 0.0) 
        old_weight = 0.0; 
      } 

      s->missing_samples = 0; 
     } 

     save_history(s, amp, j, frames); 

     j++; 
    } 

    return frames; 
} 

//Fill-in a block of missing audio samples. 
int PcmConcealer::Fill(short amp[], int frames) 
{ 
    if(!Initialized) 
     return 0; 

    int j =0; 

    for(int k = 0; k < ChannelStates.size(); k++) 
    { 
     short *tmp = new short[plc_pitch_overlap_max]; 

     int i; 
     int pitch_overlap; 

     float old_step; 
     float new_step; 
     float old_weight; 
     float new_weight; 
     float gain; 

     short *orig_amp; 
     int orig_len; 

     orig_amp = amp; 
     orig_len = frames; 

     plc_state_t *s = ChannelStates.at(k); 

     if (s->missing_samples == 0) 
     { 
      // As the gap in real speech starts we need to assess the last known pitch, 
      //and prepare the synthetic data we will use for fill-in 
      normalise_history(s); 
      s->pitch = amdf_pitch(plc_pitch_min, plc_pitch_max, s->history + (plc_history_len - correlation_span - plc_pitch_min), correlation_span); 

      // We overlap a 1/4 wavelength 
      pitch_overlap = s->pitch >> 2; 

      // Cook up a single cycle of pitch, using a single of the real signal with 1/4 
      //cycle OLA'ed to make the ends join up nicely 
      // The first 3/4 of the cycle is a simple copy 
      for (i = 0; i < s->pitch - pitch_overlap; i++) 
       s->pitchbuf[i] = s->history[plc_history_len - s->pitch + i]; 

      // The last 1/4 of the cycle is overlapped with the end of the previous cycle 
      new_step = 1.0/pitch_overlap; 
      new_weight = new_step; 

      for ( ; i < s->pitch; i++) 
      { 
       s->pitchbuf[i] = s->history[plc_history_len - s->pitch + i]*(1.0 - new_weight) + s->history[plc_history_len - 2*s->pitch + i]*new_weight; 
       new_weight += new_step; 
      } 

      // We should now be ready to fill in the gap with repeated, decaying cycles 
      // of what is in pitchbuf 

      // We need to OLA the first 1/4 wavelength of the synthetic data, to smooth 
      // it into the previous real data. To avoid the need to introduce a delay 
      // in the stream, reverse the last 1/4 wavelength, and OLA with that. 

      gain = 1.0; 
      new_step = 1.0/pitch_overlap; 
      old_step = new_step; 
      new_weight = new_step; 
      old_weight = 1.0 - new_step; 

      for (i = 0; (i < pitch_overlap) && (i < frames); i++) 
      { 
       int index = (i * channel_count) + j; 

       amp[index] = fsaturate(old_weight * s->history[plc_history_len - 1 - i] + new_weight * s->pitchbuf[i]); 
       new_weight += new_step; 
       old_weight -= old_step; 

       if (old_weight < 0.0) 
        old_weight = 0.0; 
      } 

      s->pitch_offset = i; 
     } 
     else 
     { 
      gain = 1.0 - s->missing_samples*ATTENUATION_INCREMENT; 
      i = 0; 
     } 

     for ( ; gain > 0.0 && i < frames; i++) 
     { 
      int index = (i * channel_count) + j; 

      amp[index] = s->pitchbuf[s->pitch_offset]*gain; 
      gain -= ATTENUATION_INCREMENT; 

      if (++s->pitch_offset >= s->pitch) 
       s->pitch_offset = 0; 
     } 

     for ( ; i < frames; i++) 
     { 
      int index = (i * channel_count) + j; 
      amp[i] = 0; 
     } 

     s->missing_samples += orig_len; 
     save_history(s, amp, j, frames); 

     delete [] tmp; 

     j++; 
    } 

    return frames; 
} 

void PcmConcealer::save_history(plc_state_t *s, short *buf, int channel_index, int frames) 
{ 
    if (frames >= plc_history_len) 
    { 
     /* Just keep the last part of the new data, starting at the beginning of the buffer */ 
     //memcpy(s->history, buf + len - plc_history_len, sizeof(short)*plc_history_len); 

     int frames_to_copy = plc_history_len; 

     for(int i = 0; i < frames_to_copy; i ++) 
     { 
      int index = (channel_count * (i + frames - plc_history_len)) + channel_index; 
      s->history[i] = buf[index]; 
     } 

     s->buf_ptr = 0; 
     return; 
    } 

    if (s->buf_ptr + frames > plc_history_len) 
    { 
     /* Wraps around - must break into two sections */ 
     //memcpy(s->history + s->buf_ptr, buf, sizeof(short)*(plc_history_len - s->buf_ptr)); 

     short *hist_ptr = s->history + s->buf_ptr; 
     int frames_to_copy = plc_history_len - s->buf_ptr; 

     for(int i = 0; i < frames_to_copy; i ++) 
     { 
      int index = (channel_count * i) + channel_index; 
      hist_ptr[i] = buf[index]; 
     } 

     frames -= (plc_history_len - s->buf_ptr); 


     //memcpy(s->history, buf + (plc_history_len - s->buf_ptr), sizeof(short)*len); 

     frames_to_copy = frames; 

     for(int i = 0; i < frames_to_copy; i ++) 
     { 
      int index = (channel_count * (i + (plc_history_len - s->buf_ptr))) + channel_index; 
      s->history[i] = buf[index]; 
     } 

     s->buf_ptr = frames; 
     return; 
    } 

    /* Can use just one section */ 
    //memcpy(s->history + s->buf_ptr, buf, sizeof(short)*len); 

    short *hist_ptr = s->history + s->buf_ptr; 
    int frames_to_copy = frames; 

    for(int i = 0; i < frames_to_copy; i ++) 
    { 
     int index = (channel_count * i) + channel_index; 
     hist_ptr[i] = buf[index]; 
    } 

    s->buf_ptr += frames; 
} 

void PcmConcealer::normalise_history(plc_state_t *s) 
{ 
    short *tmp = new short[plc_history_len]; 

    if (s->buf_ptr == 0) 
     return; 

    memcpy(tmp, s->history, sizeof(short)*s->buf_ptr); 
    memcpy(s->history, s->history + s->buf_ptr, sizeof(short)*(plc_history_len - s->buf_ptr)); 
    memcpy(s->history + plc_history_len - s->buf_ptr, tmp, sizeof(short)*s->buf_ptr); 

    s->buf_ptr = 0; 

    delete [] tmp; 
} 

int PcmConcealer::amdf_pitch(int min_pitch, int max_pitch, short amp[], int frames) 
{ 
    int i; 
    int j; 
    int acc; 
    int min_acc; 
    int pitch; 

    pitch = min_pitch; 
    min_acc = INT_MAX; 

    for (i = max_pitch; i <= min_pitch; i++) 
    { 
     acc = 0; 

     /*for (j = 0; j < frames; j++) 
     { 
      int index1 = (channel_count * (i+j)) + channel_index; 
      int index2 = (channel_count * j) + channel_index; 

      //std::cout << "Index 1: " << index1 << ", Index 2: " << index2 << std::endl; 

      acc += abs(amp[index1] - amp[index2]); 
     }*/ 

     for (j = 0; j < frames; j++) 
      acc += abs(amp[i + j] - amp[j]); 

     if (acc < min_acc) 
     { 
      min_acc = acc; 
      pitch = i; 
     } 
    } 

    //std::cout << "Pitch: " << pitch << std::endl; 

    return pitch; 
} 



} 
+0

그래서 당신이 어딘가에 코드를 공유 있습니다

나는 업데이트 된 코드를 첨부? 다른 대체 구현을 찾았습니까? 저는 실제로 PCM이 아닌 압축 된 오디오 형식의 PLC를 찾고 있습니다. – abbood

관련 문제