IPython
Excel
按照resolved_name对数据进行合并,以sporophytic为例
#导入包import pandas as pdfrom pandas import DataFrame,Series#设定路径,打开文件path ='C:\\Users\\jyjh\\Desktop\\data_prosessed\\1.csv'frame_1=DataFrame(pd.read_csv(path))#获取行索引row_index=[]for i in frame_1['resolved_name']: row_index.append(i)#获取数据data=[]for i in frame_1['sporophytic']: data.append(i)#生成新表series=Series(data,index=row_index)#简化行索引row_index_simplify = frame_1['resolved_name'].unique()#构建方法将数据加入字典def addNumbers(dictionary_name,key,data): dictionary_name.setdefault(key,[ ]).append(data)#创建字典new_data={}#处理数据for i in row_index_simplify: count = series[i]#判断是否是浮点数 if isinstance(count,float) or (not isinstance(count,Series)): addNumbers(new_data,i,count) else: for j in count: if isinstance(j,float): addNumbers(new_data,i,j) else: for k in j.split(','): addNumbers(new_data,i,k.strip())#计数器并统计类型数量def count_it(count): seq={}#判断是否是浮点数 if isinstance(count,float): seq[count]=1 else: for i in count: if i in seq: seq[i]+=1 else: seq[i]=1 return seq.keys()#创建新文件并保存文件series_2=Series(new_data)series_2.to_csv('raw_data.csv')#创建容器container={}#装载容器for j in row_index_simplify: item=count_it(series_2[j]) addNumbers(container,j,item)#生成数据并保存container_to_csv=Series(container)container_to_csv.to_csv('data.csv')
结果如图,数据合并并且获得简化数据
相关程序必须安装
对应的不同数据需要稍作修改