我编写了 C++ 相位声码器来根据 DAFX 书中的 MATLAB 代码更改音高。MATLAB 代码和示例音频文件在这里。我在纯正弦波上进行了测试,MATLAB 代码输出了良好的结果,但 C++ 增加了一些嗡嗡声。我花了几天时间试图理解为什么,我会很感激你的想法。
以下是相关的 C++ 方法。他们使用其他类,但下面的代码摘录应该足够清晰。对于从文件中读取pitchShift的每一帧,都会调用第一种方法。为每一跳调用FFTWindowLength = 1024第二种方法。synthesizeHoppitchShift
vector<double> PitchShiftResampling::pitchShift(long numSampsToProcess, AudioBuffer audioBuffer){
static double fftProcessData[MAX_FRAME_LENGTH];
long i, s, k;
long gRover = PitchShiftResampling::inFifoLatency;
vector<double> output(numSampsToProcess);
// TODO: Make it work for stereo. For now, work with only one channel
vector<double> indata = audioBuffer.getSampleData(0);
FFT* fft = new FFT(fftFrameSize, audioBuffer.getNumChannels());
/* main processing loop */
for (i = 0; i < numSampsToProcess; i++){
output[i] = gOutFIFO[gRover-inFifoLatency];
/* As long as we have not yet collected enough data just read in */
gInFIFO[gRover] = indata[i];
gRover++;
/* now we have enough data for processing */
if (gRover >= fftFrameSize) {
gRover = inFifoLatency;
/* do windowing */
for (s = 0; s < fftFrameSize; s++) {
window = 0.5 * (1.0 - cos (2.0*M_PI*(double)s/(double)(fftFrameSize)));
fftProcessData[s] = window * gInFIFO[s];
}
/* do FFT centering */
Util::fftshift(fftProcessData, MAX_FRAME_LENGTH);
/* do FFT */
fft->computeFFTFrameData(fftProcessData);
synthesizeHop(fft);
/* move input FIFO */
for (k = 0; k < inFifoLatency; k++) {
gInFIFO[k] = gInFIFO[k+stepSize];
}
}
}
delete fft;
return output;
}
void PitchShiftResampling::synthesizeHop(FFT *fft) {
int k;
vector<double> omega;
for (k = 0; k < fftFrameSize; k++) {
omega.push_back(2 * M_PI * stepSize * k / fftFrameSize);
}
for (k = 0; k <= fftFrameSize2; k++) {
double currentAnaPhase = fft->phaseMagn->phas[0][k];
double deltaPhi = omega[k] + Util::princarg(currentAnaPhase - prevAnaPhase[k] - omega[k]);
double currentSynPhase = Util::princarg(prevSynPhase[k] + deltaPhi * pitchShift);
double magn = fft->phaseMagn->magn[0][k];
double real = magn * cos(currentSynPhase);
double imag = magn * sin(currentSynPhase);
// fftw library uses following format for spectrum data
// r0, r1, r2, ...., r_(n/2), i_(n+1)/2-1, ...., i2, i1
// where (r0,0) is f_0, (r1,i1) is f_1, and so on (f_0 does not have imaginary part because the input array is real).
if (k == 0) {
fft->fftFrameSpectrum->currentChannelData->spectrum[k] = real;
}
if (k > 0) {
fft->fftFrameSpectrum->currentChannelData->spectrum[k] = real;
fft->fftFrameSpectrum->currentChannelData->spectrum[fftFrameSize - k] = imag;
}
prevAnaPhase[k] = currentAnaPhase;
prevSynPhase[k] = currentSynPhase;
}
/* IFFT */
fft->invertFFT(1);
Util::fftshift(fft->fftFrameSpectrum->currentChannelData->output, fftFrameSize);
vector<double> grain;
/* do windowing and add to output accumulator */
for(k=0; k < fftFrameSize; k++) {
window = 0.5 * (1.0 - cos (2.0*M_PI*(double)k/(double)(fftFrameSize)));
grain.push_back(window * fft->fftFrameSpectrum->currentChannelData->output[k] / fftFrameSize);
}
vector<double> grain2;
vector<double> grain3;
for(k=0; k < fftFrameSize; k++) {
grain2.push_back(grain[k]);
}
grain2.push_back(0);
for(k=0; k < lx; k++) {
double a = grain2[ix[k]];
double val = a * dx1[k] + grain2[ix1[k]] * dx[k];
grain3.push_back(val);
}
for(k=0; k < lx; k++) {
gOutputAccum[k] += grain3[k];
}
for (k = 0; k < stepSize; k++) {
gOutFIFO[k] = gOutputAccum[k];
}
/* shift accumulator */
memmove(gOutputAccum, gOutputAccum+stepSize, fftFrameSize*2*sizeof(float));
}
我测试princarg了fftshift函数,确保我正确地将值分配给fftw库光谱数据结构。我还确保正确实现了重叠添加,并且插值部分等效于 MATLAB 代码。
这是生成的正弦波的屏幕截图,使用 C++ 处理,使用 0.8 音高偏移比(输入为 440Hz 正弦波):

正弦波中出现毛刺循环,即使对于不同的音高移位比。随着音高比降至 1 以下,C++ 中的毛刺变得更糟,但在 MATLAB 中处理的正弦波看起来几乎完美。
非常感谢有关在哪里查找错误的任何提示或想法!
编辑:问题已解决
多谢你们!我怀疑存在与重叠相关的问题,结果发现我的fftshift函数有错误。显然我没有足够仔细地测试它。换档差了一个,导致每一跳都出现故障。
我换了
std::rotate(&in[0], &in[n - n2], &in[n - 1]);
和:
for (int i = 0; i < n2; i++)
{
double tmp = in[i];
in[i] = in[i + n2];
in[i + n2] = tmp;
}
其中n是数组的长度,n2 = n / 2并且嗡嗡声消失了!