python与vba技术特点分析_VBA处理数据与Python Pandas处理数据案例比较分析

最新推荐文章于 2020-11-24 06:49:45 发布

weixin_39725118

最新推荐文章于 2020-11-24 06:49:45 发布

阅读量145

点赞数

文章标签： python与vba技术特点分析

本文对比了使用Python的Pandas库和Excel VBA处理CSV文件中去空行、去重、改列名及添加新列的需求。Pandas处理方式简洁高效，而VBA则涉及数组、字典等数据结构，代码复杂，调试难度相对较高。两者最终输出结果相同，但在处理大量数据时，Pandas更具优势。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

需求：

现有一个 csv文件，包含'CNUM'和'COMPANY'两列，数据里包含空行，且有内容重复的行数据。

要求：

1）去掉空行；

2）重复行数据只保留一行有效数据；

3）修改'COMPANY'列的名称为'Company_New‘；

4）并在其后增加六列，分别为'C_col',‘D_col',‘E_col',‘F_col',‘G_col',‘H_col'。

一，使用 Python Pandas来处理：

import pandas as pd

import numpy as np

from pandasimport DataFrame,Series

def deal_with_data(filepath,newpath):

file_obj=open(filepath)

df=pd.read_csv(file_obj)# 读取csv文件，创建 DataFrame

df=df.reindex(columns=['CNUM','COMPANY','C_col','D_col','E_col','F_col','G_col','H_col'],fill_value=None)# 重新指定列索引

df.rename(columns={'COMPANY':'Company_New'}, inplace= True)# 修改列名

df=df.dropna(axis=0,how='all')# 去除 NAN 即文件中的空行

df['CNUM']= df['CNUM'].astype('int32')# 将 CNUM 列的数据类型指定为 int32

df= df.drop_duplicates(subset=['CNUM','Company_New'], keep='first')# 去除重复行

df.to_csv(newpath,index=False,encoding='GBK')

file_obj.close()

if __name__=='__main__':

file_path=r'C:UsersP78DesktoppythonCNUM_COMPANY.csv'

file_save_path=r'C:UsersP78DesktoppythonCNUM_COMPANY_OUTPUT.csv'

deal_with_data(file_path,file_save_path)

二，使用 VBA来处理：

100

101

102

103

104

Option Base1

Option Explicit

Sub main()

On Error GoTo error_handling

Dim wb As Workbook

Dim wb_out As Workbook

Dim sht As Worksheet

Dim sht_out As Worksheet

Dim rng AsRange

Dim usedrows As Byte

Dim usedrows_out As Byte

Dim dict_cnum_company AsObject

Dim str_file_path As String

Dim str_new_file_path As String

'assign values to variables:

str_file_path= "C:UsersP78DesktopPythonCNUM_COMPANY.csv"

str_new_file_path= "C:UsersP78DesktopPythonCNUM_COMPANY_OUTPUT.csv"

Set wb= checkAndAttachWorkbook(str_file_path)

Set sht= wb.Worksheets("CNUM_COMPANY")

Set wb_out= Workbooks.Add

wb_out.SaveAs str_new_file_path, xlCSV 'create a csvfile

Set sht_out= wb_out.Worksheets("CNUM_COMPANY_OUTPUT")

Set dict_cnum_company= CreateObject("Scripting.Dictionary")

usedrows= WorksheetFunction.Max(getLastValidRow(sht,"A"), getLastValidRow(sht,"B"))

'rename the header 'COMPANY' to 'Company_New',remove blank & duplicate lines/rows.

Dim cnum_company As String

cnum_company= ""

For Each rng In sht.Range("A1","A" & usedrows)

If VBA.Trim(rng.Offset(0,1).Value)= "COMPANY" Then

rng.Offset(0,1).Value= "Company_New"

End If

cnum_company= rng.Value &"-" & rng.Offset(0,1).Value

If VBA.Trim(cnum_company) <>"-" And Not dict_cnum_company.Exists(rng.Value &"-" & rng.Offset(0,1).Value) Then

dict_cnum_company.Add rng.Value &"-" & rng.Offset(0,1).Value, ""

End If

Next rng

'loop the keys of dict split the keyes by '-' into cnum arrayand company array.

Dim index_dict As Byte

Dim arr_cnum()

Dim arr_Company()

For index_dict= 0 To UBound(dict_cnum_company.keys)

ReDim Preserve arr_cnum(1 To UBound(dict_cnum_company.keys)+ 1)

ReDim Preserve arr_Company(1 To UBound(dict_cnum_company.keys)+ 1)

arr_cnum(index_dict+ 1)= Split(dict_cnum_company.keys()(index_dict),"-")(0)

arr_Company(index_dict+ 1)= Split(dict_cnum_company.keys()(index_dict),"-")(1)

Debug.Print index_dict

'assigns the value of the arrays to the celles.

sht_out.Range("A1","A" & UBound(arr_cnum))= Application.WorksheetFunction.Transpose(arr_cnum)

sht_out.Range("B1","B" & UBound(arr_Company))= Application.WorksheetFunction.Transpose(arr_Company)

'add6 columns to output csvfile:

Dim arr_columns() As Variant

arr_columns= Array("C_col","D_col","E_col","F_col","G_col","H_col") '

sht_out.Range("C1:H1")= arr_columns

Call checkAndCloseWorkbook(str_file_path,False)

Call checkAndCloseWorkbook(str_new_file_path,True)

Exit Sub

error_handling:

Call checkAndCloseWorkbook(str_file_path,False)

Call checkAndCloseWorkbook(str_new_file_path,False)

End Sub

' 辅助函数：

'Get last row of Column Nin a Worksheet

Function getLastValidRow(in_ws As Worksheet, in_col As String)

getLastValidRow= in_ws.Cells(in_ws.Rows.count, in_col).End(xlUp).Row

End Function

Function checkAndAttachWorkbook(in_wb_path As String) As Workbook

Dim wb As Workbook

Dim mywb As String

mywb= in_wb_path

For Each wb In Workbooks

If LCase(wb.FullName)= LCase(mywb) Then

Set checkAndAttachWorkbook= wb

Exit Function

End If

Set wb= Workbooks.Open(in_wb_path, UpdateLinks:=0)

Set checkAndAttachWorkbook= wb

End Function

Function checkAndCloseWorkbook(in_wb_path As String, in_saved As Boolean)

Dim wb As Workbook

Dim mywb As String

mywb= in_wb_path

For Each wb In Workbooks

If LCase(wb.FullName)= LCase(mywb) Then

wb.Close savechanges:=in_saved

Exit Function

End If

End Function

三，输出结果：

两种方法输出结果相同：

四，比较总结：

Python pandas 内置了大量处理数据的方法，我们不需要重复造轮子，用起来很方便，代码简洁的多。

Excel VBA 处理这个需求，使用了数组，字典等数据结构（实际需求中，数据量往往很大，所以一些地方没有直接使用遍历单元格的方法），以及处理字符串，数组和字典的很多方法，对文件的操作也很复杂，一旦出错，调试起来比python也较困难，代码已经尽量优化，但还是远比 Python要多。

到此这篇关于VBA处理数据与Python Pandas处理数据案例比较分析的文章就介绍到这了,更多相关VBA与Python Pandas处理数据内容请搜索服务器之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持服务器之家！

原文链接：https://blog.csdn.net/qq_24937551/article/details/105338086