使⽤Python处理CSV⽂件的⼀些代码⽰例笔记:使⽤Python处理CSV⽂件的⼀些代码⽰例,来⾃于《Python数据分析基础》⼀书,有删改
# 读写CSV⽂件,不使⽤CSV模块,仅使⽤基础Python
# 20181110 wangml
#!/usr/bin/env python3
input_file = 'D:\wangm\Documents\learning\code\python\supplier_data.csv'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
# 分别以读、写⽅式打开input_file、output_file,当以 w ⽅式打开的⽂件不存在,则创建它
with open(input_file, 'r', newline='') as filereader:
with open(output_file, 'w', newline='') as filewriter:
# 读取⼀⾏⽂件内容
header = adline()
header = header.strip()
header_list = header.split(',')
print(header_list)
filewriter.write(','.join(map(str, header_list))+'\n')
for row in filereader:
row = row.strip()
row_list = row.split(',')
print(row_list)
filewriter.write(','.join(map(str, row_list))+'\n')
# 使⽤CSV模块读写CSV⽂件
# 20181112 wangml
# csv_pandas_1
#!/usr/bin/env python3
# 导⼊CSV库
import csv
input_file = 'D:\wangm\Documents\learning\code\python\supplier_data.csv'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
with open(input_file, 'r', newline='') as csv_in_file:
with open(output_file, 'w', newline='') as csv_out_file:
# 使⽤CVS模块中ader()、csv.writer()函数,创建⼀个读取对象、⼀个写⼊对象
# delimiter指定CSV⽂件的分隔符,默认为 , 逗号
filereader = ader(csv_in_file, delimiter=',')
filewriter = csv.writer(csv_out_file, delimiter=',')
header = next(filereader)
filewriter.writerow(header)
# 循环,每次从CSV读取⽂件中读取⼀⾏数据,并将其打印出来,然后写⼊CSV写⼊对象
for row_list in filereader:
print(row_list)
filewriter.writerow(row_list)
# 筛选符合条件的⾏
for row_list in filereader:
#print(row_list[1])
name = str(row_list[0]).strip()
#print(row_list[3])
cost = str(row_list[3]).strip('$').replace(',', '')
#print(cost)
#print(type(cost))
# 选择name为z或者cost⼤于600的row,此处使⽤float()函数将cost由str类型转换为flost
if name == 'z'or float(cost) > 600.0:
filewriter.writerow(row_list)
# # csv_pandas_1
#!/usr/bin/env python3
import pandas as pd
input_file = 'D:\wangm\Documents\learning\code\python\supplier_data.csv'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
# 使⽤pandas库函数ad_csv()读取⼀个CSV⽂件,并由此创建⼀个数据框对象
data_frame = pd.read_csv(input_file)
# 通过列名作为index选取该数据框中的指定列
data_frame['Cost'] = data_frame['Cost'].str.strip('$').astype(float)
#print(type(data_frame['Cost']))
data_frame_value_meets_condition = data_frame.loc[(data_frame['Name'].ains('Z')) | (data_frame['Cost'] > 600.0), :]
# 此处导致CSV⽂件的Cost列的$消失了
# 下⾯的语句并没有将$加上去,暂时不知道怎么弄
data_frame['Cost'] = '$' + str(data_frame['Cost'])
# 将data_frame_value_meets_condition写⼊输出⽂件
data_frame_value__csv(output_file, index=False)
# 20181113
# csv_pandas_2
#!/usr/bin/env python3
# 导⼊CSV库
import csv
input_file = 'D:\wangm\Documents\learning\code\python\supplier_data.csv'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
important_dates = ['1/1/2018', '2/1/2018']
with open(input_file, 'r', newline='') as csv_in_file:
with open(output_file, 'w', newline='') as csv_out_file:
filereader = ader(csv_in_file)
filewriter = csv.writer(csv_out_file)
header = next(filereader)
filewriter.writerow(header)
for row_list in filereader:
a_date = row_list[4]
# 选取date值在important_dates中的⾏
if a_date in important_dates:
filewriter.writerow(row_list)
# # csv_pandas_2
#!/usr/bin/env python3
import pandas as pd
input_file = 'D:\wangm\Documents\learning\code\python\supplier_data.csv'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
# 使⽤pandas库函数ad_csv()读取⼀个CSV⽂件,并由此创建⼀个数据框对象
data_frame = pd.read_csv(input_file)
important_dates = ['1/1/2018', '2/1/2018']
# 选取date值在important_dates中的⾏
data_frame_value_set = data_frame.loc[data_frame['Date'].isin(important_dates), :]
data_frame__csv(output_file, index=False)
# 20181113
# csv_pandas_3
#!/usr/bin/env python3
# 导⼊CSV库、正则表达式库
python怎么读csv数据import csv
import re
input_file = 'D:\wangm\Documents\learning\code\python\supplier_data.csv'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
# 使⽤repile(正则表达式),创建⼀个正则表达式变量
# 元字符?P<my_pattern_group>捕获了名为<my_pattern_group>的组中匹配了的字符串
# pattern表⽰满⾜以:'001-'开头,后⾯可跟除任意字串的字符串
# re.I表⽰⼤⼩写敏感
pattern = repile(r'(?P<my_pattern_group>^001-.*)', re.I)
with open(input_file, 'r', newline='') as csv_in_file:
with open(output_file, 'w', newline='') as csv_out_file:
filereader = ader(csv_in_file)
filewriter = csv.writer(csv_out_file)
header = next(filereader)
filewriter.writerow(header)
for row_list in filereader:
id_number = row_list[1]
if pattern.search(id_number):
filewriter.writerow(row_list)
# 20181113
# csv_pandas_3
#!/usr/bin/env python3
import pandas as pd
input_file = 'D:\wangm\Documents\learning\code\python\supplier_data.csv'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
# 使⽤pandas库函数ad_csv()读取⼀个CSV⽂件,并由此创建⼀个数据框对象
data_frame = pd.read_csv(input_file)
# 筛选出ID值以001-开头的⾏
data_frame_value_matches_pattern = data_frame.loc[data_frame['ID'].str.startswith("001-"), :] data_frame_value__csv(output_file, index=False)
# 选取CSV⽂件中符合条件的列
# 20181113
# csv_pandas_4
# 通过列索引值选取特定列
# 在只知道需要选取的列名称时,我们可以通过列名称取得相应的索引值,在进⾏选取
# 具体⽅法是判断相应标题⾏每个元素是否在已知列名称中,若是,记下该item的index
#!/usr/bin/env python3
import csv
input_file = 'D:\wangm\Documents\learning\code\python\supplier_data.csv'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
my_columns = [0, 3]
with open(input_file, 'r', newline='') as csv_in_file:
with open(output_file, 'w', newline='') as csv_out_file:
filereader = ader(csv_in_file)
filewriter = csv.writer(csv_out_file)
for row_list in filereader:
# 每次向输出⽂件中写⼊的⼀⾏值
row_list_output = []
for index_value in my_columns:
row_list_output.append(row_list[index_value])
filewriter.writerow(row_list_output)
# 选取CSV⽂件中符合条件的列
# 20181113
# csv_pandas_4
# 通过列索引值选取特定列
# 在只知道需要选取的列名称时,不需要像基本Python⼀样处理标题⾏,pandas可以将列名称当做index⼀样处理#!/usr/bin/env python3
import pandas as pd
input_file = 'D:\wangm\Documents\learning\code\python\supplier_data.csv'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
# 使⽤pandas库函数ad_csv()读取⼀个CSV⽂件,并由此创建⼀个数据框对象
data_frame = pd.read_csv(input_file)
# 选取data_frame数据框对象中的所有⾏的列索引值为0,3的列
# iloc(⾏,列)函数可以选取数据框中选定的⾏、列
data_frame_value_column_by_value = data_frame.iloc[:, [0, 3]]
# data_frame_value_column_by_value = data_frame.iloc[:, [‘Name’, 'Cost']]
data_frame_value_column__csv(output_file, index=False)
# 给⼀个CSV⽂件添加标题⾏,在基础Python中,可能是将标题⾏通过csv库的writerow()函数写⼊
# ⽽pandas库提供了更加简单的⽅法
# title = [‘One’, 'Two'...]
# data_frame = pd.read_csv(input_file, header=None, names=title)
# 读取多个CSV⽂件,输出读取了多少个CSV⽂件
#!/usr/bin/env python3
import csv
import glob
import os
input_path = 'D:\wangm\Documents\learning\code\python'
file_counter = 0
for input_file in glob.glob(os.path.join(input_path, '*.csv')):
file_counter = file_counter + 1
#row_counter = 1
#with open(input_file, 'r', newline='') as csv_input_file:
#filereader = ader(csv_input_file)
#...
print(file_counter)
# 20181114
# 合并多个CSV⽂件
#!/usv/bin/env python3
import pandas as pd
import os
import glob
input_path = 'D:\wangm\Documents\learning\code\python'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
#all_files = glob.glob(os.path.join(input_path, 'supplier_data_副本*'))
# OSError: Initializing from file failed上⾯这句出现错误,因为⽂件名含有中⽂,改成下⾯这句就⾏了
all_files = glob.glob(os.path.join(input_path, 'supplier_data_copy*'))
all_data_frame = []
for file in all_files:
data_frame = pd.read_csv(file, index_col=None)
all_data_frame.append(data_frame)
# at()函数将数据框数据垂直堆叠(axis=0), 当⽔平连接数据时(asis=1)
data_frame_concat = pd.concat(all_data_frame, axis=0, ignore_index=True)
data__csv(output_file, index=False)
# 分别计算多个CSV⽂件中的某项数据的和、平均值等
# 在基本python中,可以读取多个CSV⽂件,然后要被计算的项的值⼀个⼀个取出来,然后计算
# 这⾥展⽰了使⽤pandas提供的⽅法
#!/usv/bin/env python3
import pandas as pd
import os
import glob
input_path = 'D:\wangm\Documents\learning\code\python'
output_file = 'D:\wangm\Documents\learning\code\python\supplier_data_out.csv'
all_files = glob.glob(os.path.join(input_path, 'supplier_data_copy*'))
all_data_frame = []
for file in all_files:
data_frame = pd.read_csv(file, index_col=None)
# 和
total_cost = pd.DataFrame([float(str(value).strip('$').replace(',', '')) \
for value in data_frame.loc[:, 'Cost']]).sum()
# 平均值
average_cost = pd.DataFrame([float(str(value).strip('$').replace(',', '')) \
for value in data_frame.loc[:, 'Cost']]).mean()
data = {'file_name': os.path.basename(file),
'total_cost': total_cost,
'average_cost': average_cost}
all_data_frame.append(pd.DataFrame(data, columns=['file_name', 'total_cost', 'average_cost']))
data_frames_concat = pd.concat(all_data_frame, axis=0, ignore_index=True)
data__csv(output_file, index=False)
代码⽰例中使⽤的CSV⽂件:
上述代码分别使⽤CSV库、pandas库来对CSV⽂件进⾏相同的操作上述代码运⾏在Python 3.6版本下,在Win10、Spyder中
有关Python的csv库的详细介绍:

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。