Pandas 数据处理数据处理,数据清洗详解数据清洗详解
今天小编就为大家分享一篇Pandas 数据处理,数据清洗详解,具有很好的参考价值,希望对大家有所帮助。一
起跟随小编过来看看吧
如下所示:如下所示:
# -*-coding:utf-8-*-
from pandas import DataFrame
import pandas as pd
import numpy as np
"""
获取行列数据
"""
df = DataFrame(np.random.rand(4, 5), columns=['A', 'B', 'C', 'D', 'E'])
print df
print
df['col_sum'] = df.apply(lambda x: x.sum(), axis=1) # 横向求和,axis=1表示横向
df.loc['row_sum'] = df.apply(lambda x: x.sum()) # loc获取一整列的数据,对一列数据进行求和
print df
print
dd = pd.DataFrame(np.arange(0, 60, 2).reshape(10, 3), columns=list('abc'))
# loc获取一整列的数据
print dd
print
print dd.loc[0:len(dd), 'a']
print
print dd.loc[0:3, ['a', 'b']]
print
print dd.loc[[1, 5], ['b', 'c']]
print '--------------------------------------'
# iloc获取某个位置的元素,或者某个区域的元素
print dd.iloc[1, 1]
print dd.iloc[0:3, [0, 1]]
print dd.iloc[[0, 3, 5], 0:2]
print '--------------------------------------'
"""
去重函数 drop_duplicates()
"""
from pandas import Series, DataFrame
data = DataFrame({'k': [1, 1, 2, 2]})
print data
print type(data) # <class 'pandas.core.frame.DataFrame'>
print
isduplicates = data.duplicated() # duplicated()判断是否是重复的项
print isduplicates
print type(isduplicates) # <class 'pandas.core.series.Series'>
print
data = data.drop_duplicates() # drop_duplicates()移除重复的项
print data
print type(data) # <class 'pandas.core.frame.DataFrame'>
print '-------------------------------------------------'
"""
Pandas.DataFrame 读取、合并、修改列数据、新增列、分组、分组数据计算
"""
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from datetime import timedelta, datetime
from dateutil.parser import parse
"""
读写csv文件
"""
# 读取csv文件
df = pd.read_csv('data_english.csv', encoding='gbk')
# print df
评论0
最新资源