声纹识别之Alize入门教程(三):I-vector

Alize完整的ivector例程,包括:数据准备、特征提取、训练以及测试等。最终生成的得分文件res/scores_PLDA_lengthnorm.txt,其含义参考GMM-UBM

M S0002 1 BAC009S0002W0122 0.644295
M S0003 1 BAC009S0002W0122 0.520998
M S0004 1 BAC009S0002W0122 0.48462
M S0002 1 BAC009S0002W0123 0.65574
M S0003 1 BAC009S0002W0123 0.722522
M S0004 1 BAC009S0002W0123 0.434874

利用自己的数据训练,只需要按照data/目录下对应的文件夹进行添加/删除即可。修改gmm/ivector/plda等参数配置,只需要修改cfg/目录下对应的文件即可。

data/目录如下:

1.提取特征(01_extraction_feature.py)

提取特征,并产生相应的lst文件。

import os

def gen_lst (src_fpath, lst_fpath):
    '''    用于产生.lst文件
    src_fpath 为UBM的wav文件路径,src_fpath = './data/ubm'
    lst_fpath 为UBM.lst存放路径,lst_fpath = './lst/UBM.lst'
    '''

    flist=open(lst_fpath,'w')

    spks = os.listdir(src_fpath)
    spks.sort()
    for spk in spks:
        spk_path = os.path.join(src_fpath,spk)
        spk_files = os.listdir(spk_path)
        spk_files.sort()
        for file in spk_files:
            if file[-4:] != '.wav':
                print('%s not .wav file'%(file))
                continue
            flist.write(file[0:-4]+'\n');  
        
    flist.close()    

def extraction_feature(src_path):
    '''提取特征
    src_path = './data/ubm'
    '''
    spks =  os.listdir(src_path)
    spks.sort()
    for spk in spks:
        spk_path = os.path.join(src_path,spk)
        if not os.path.isdir(spk_path):
            continue
        
        spk_files = os.listdir(spk_path)
        spk_files.sort()
        for file in spk_files:
            if file[-4:] != '.wav':
                print('%s not .wav file'%(file))
                continue
            COMMAND_LINE = '%s%s%s%s%s'%('bin\\sfbcep.exe -m -k 0.97 -p19 -n 24 -r 22 -e -D -A -F wave ',
                            os.path.join(spk_path,file),' ./data/prm/',file[:-4],'.tmp.prm')
        
            os.system(COMMAND_LINE) 
            print('%s'%(COMMAND_LINE))

if __name__== "__main__" :
    src_ubm = './data/ubm'
    lst_ubm = './lst/UBM.lst'
    gen_lst (src_ubm, lst_ubm)

    src_test = './data/test'
    lst_test = './lst/test.lst'
    gen_lst (src_test, lst_test)

    src_train = './data/train'
    lst_train = './lst/train.lst'
    gen_lst (src_train, lst_train)


    # 提取特征
    extraction_feature(src_ubm)
    extraction_feature(src_test)
    extraction_feature(src_train)

2.数据及特征处理(02_RUN_spro_front-end.py)

能量检测(VAD)、特征归一化。

    CMD_NORM_E="bin\\NormFeat.exe --config cfg/NormFeat_energy_SPro.cfg --inputFeatureFilename data/data.lst --featureFilesPath  data/prm/"
    os.system(CMD_NORM_E) 
    
    CMD_ENERGY="bin\\EnergyDetector.exe  --config cfg/EnergyDetector_SPro.cfg --inputFeatureFilename data/data.lst --featureFilesPath  data/prm/  --labelFilesPath  data/lbl/"
    os.system(CMD_ENERGY) 
    
    CMD_NORM="bin\\NormFeat.exe --config cfg/NormFeat_SPro.cfg --inputFeatureFilename data/data.lst --featureFilesPath data/prm/   --labelFilesPath  data/lbl/"
    os.system(CMD_NORM) 
 

3.产生ndx配置文件(03_gen_ndx.py)

import os

def gen_ndx (src_fpath, lst_fpath):
    '''    用于产生.ndx文件
    lst_fpath = ndx/ivExtractor.ndx
    '''

    flist=open(lst_fpath,'w')

    spks = os.listdir(src_fpath)
    spks.sort()
    for spk in spks:
        flist.write(spk+' ')
        spk_path = os.path.join(src_fpath,spk)
        spk_files = os.listdir(spk_path)
        spk_files.sort()
        for file in spk_files:
            if file[-4:] != '.wav':
                print('%s not .wav file'%(file))
                continue
            flist.write(file[0:-4]+' ')  
        flist.write('\n') 
    flist.close()    

def gen_ivndx (src_fpath, lst_fpath):
    '''    
    '''

    flist=open(lst_fpath,'w')

    spks = os.listdir(src_fpath)
    spks.sort()
    for spk in spks:
        spk_path = os.path.join(src_fpath,spk)
        spk_files = os.listdir(spk_path)
        spk_files.sort()
        for file in spk_files:
            if file[-4:] != '.wav':
                print('%s not .wav file'%(file))
                continue
            flist.write(file[0:-4]+' '+file[0:-4]+'\n');  
        
    flist.close()    

def gen_pladndx (src_fpath, lst_fpath):
    '''    用于产生.ndx文件
    lst_fpath = ndx/ivExtractor.ndx
    '''
    flist=open(lst_fpath,'w')
    spks = os.listdir(src_fpath)
    spks.sort()
    for spk in spks:
        #flist.write(spk+' ')
        spk_path = os.path.join(src_fpath,spk)
        spk_files = os.listdir(spk_path)
        spk_files.sort()
        for file in spk_files:
            if file[-4:] != '.wav':
                print('%s not .wav file'%(file))
                continue
            flist.write(file[0:-4]+' ')  
        flist.write('\n') 
    flist.close()

if __name__== "__main__" :
    

    '''???
    '''
    src_train = './data/train'
    lst_train = 'ndx/ivExtractor_train.ndx'
    gen_ivndx (src_train, lst_train)

    src_test = './data/test'
    lst_test = 'ndx/ivExtractor_test.ndx'
    gen_ivndx (src_test, lst_test)

    gen_ivndx ('data/ubm', 'ndx/ivExtractor_ubm.ndx')


    filelist = ['ndx/ivExtractor_ubm.ndx','ndx/ivExtractor_train.ndx','ndx/ivExtractor_test.ndx']
    newfile=open('ndx/ivExtractor.ndx','w')
    for item in filelist:
        for txt in open(item,'r'):
            newfile.write(txt)

    newfile.close()

    '''
    '''
    src_plda = './data/ubm'
    lst_plda = 'ndx/Plda.ndx'
    gen_pladndx (src_plda, lst_plda)

    src_plda = './data/ubm'
    lst_plda = 'ndx/ivNorm.ndx'
    gen_pladndx (src_plda, lst_plda)


    #trainModel.ndx
    src_train = './data/train'
    lst_train = 'ndx/trainModel.ndx'
    gen_ndx (src_train, lst_train)

    #ivTest_plda_target-seg.ndx


    
    src_test = './data/test'
#    src_train
    ndx_fpath = 'ndx/ivTest_plda_target-seg.ndx'
    flist=open(ndx_fpath,'w')
    spks_train = os.listdir(src_train)
    spks = os.listdir(src_test)
    spks.sort()
    for spk in spks:
        spk_path = os.path.join(src_test,spk)
        spk_files = os.listdir(spk_path)
        spk_files.sort()
        for file in spk_files:
            if file[-4:] != '.wav':
                print('%s not .wav file'%(file))
                continue
            flist.write(file[0:-4]+' ')
            for spk_train in spks_train:
                flist.write(spk_train+' ')
            flist.write('\n')
        
    flist.close()    

4.提取ivector特征并进行plda打分测试(04_RUN_i-vector_and_plda_test.py)

# 1. UBM training
    print( "Train Universal Background Model by EM algorithm")
    CMD_LINE = "bin\\TrainWorld.exe --config cfg/TrainWorld.cfg "
    os.system(CMD_LINE) 
    


    # 2. Total Variability matrix Estimation
    print( "Train TotalVariability matrix")
    CMD_LINE = "bin\\TotalVariability.exe --config cfg/TotalVariability.cfg"
    os.system(CMD_LINE) 
    
    # 3. I-vector extraction
    print( "Extract i-vectors")
    CMD_LINE = "bin\\IvExtractor.exe --config cfg/ivExtractor.cfg "
    os.system(CMD_LINE) 
    

# 1. I-vector Normalization
    print( "Normalize i-vectors")
    CMD_LINE = "bin\\IvNorm.exe --config cfg/ivNorm.cfg "
    os.system(CMD_LINE)
    
# 2. PLDA Training
    print( "Train Probabilistic Linear Discriminant Analysis model")
    CMD_LINE = "bin\\PLDA.exe --config cfg/Plda.cfg "
    os.system(CMD_LINE)
    
# 3. PLDA Testing
    print( "Compare models to test segments using PLDA native scoring")
    CMD_LINE = "bin\\IvTest.exe --config cfg/ivTest_Plda.cfg "
    os.system(CMD_LINE)
  

 

完整代码下载地址:https://download.csdn.net/download/u012594175/11100607

声纹识别交流QQ群:875705987

MSR Identity Toolbox: A Matlab Toolbox for Speaker Recognition Research Version 1.0 Seyed Omid Sadjadi, Malcolm Slaney, and Larry Heck Microsoft Research, Conversational Systems Research Center (CSRC) [email protected], {mslaney,larry.heck}@microsoft.com This report serves as a user manual for the tools available in the Microsoft Research (MSR) Identity Toolbox. This toolbox contains a collection of Matlab tools and routines that can be used for research and development in speaker recognition. It provides researchers with a test bed for developing new front-end and back-end techniques, allowing replicable evaluation of new advancements. It will also help newcomers in the field by lowering the “barrier to entry”, enabling them to quickly build baseline systems for their experiments. Although the focus of this toolbox is on speaker recognition, it can also be used for other speech related applications such as language, dialect and accent identification. In recent years, the design of robust and effective speaker recognition algorithms has attracted significant research effort from academic and commercial institutions. Speaker recognition has evolved substantially over the past 40 years; from discrete vector quantization (VQ) based systems to adapted Gaussian mixture model (GMM) solutions, and more recently to factor analysis based Eigenvoice (i-vector) frameworks. The Identity Toolbox provides tools that implement both the conventional GMM-UBM and state-of-the-art i-vector based speaker recognition strategies. A speaker recognition system includes two primary components: a front-end and a back-end. The front-end transforms acoustic waveforms into more compact and less redundant representations called acoustic features. Cepstral features are most often used for speaker recognition. It is practical to only retain the high signal-to-noise ratio (SNR) regions of the waveform, therefore there is also a need for a speech activity detector (SAD) in the fr
评论 10
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值