创建虚拟数据
创建文件
读取文件
行，列，单元格
数据区域的读取，填充数字，文字序列
填充日期序列
函数填充，计算列
排序
数据筛选、过滤
柱状图
分组柱图，深度优化
叠加柱状图，水平柱状图
饼图
折线趋势图，叠加区域图
散点图、直方图、密度图、数据相关性
多表联合(从VLOOKUP到JOIN)
数据校验、轴的概念
把一列数据分割成两列
求和、求平均、统计引导
定位、消除重复数据
旋转数据表（行转换）
读取CVS、TSV、TXT文件中的数据
透视表、分组、聚合(GroupBy)
线性回归、数据预测
条件格式化
行操作集锦
列操作集锦
读取数据库
编写复杂方程

创建虚拟数据

faker库

import pandas as pd
from faker import Faker
import random as rd
fake = Faker("zh-CN")
# 要输入到DF中的数据字典
dic = {}
# 姓名
list_name = []
# 随机手机号
list_phone_number = []
# 随机国家
list_country = []
# 随机邮件
list_email = []
# 随机车牌号
list_car_number = []
# 随机年份，格式2022-02-22
list_date = []
# 随机字符串，长度在min-max之间
list_str =[]
# 随机年龄
list_age = []
# 随机金额
list_money = []
# 索引
list_index = []
for i in range(0,1000):
    list_index.append(i)
    list_name.append(fake.name())
    list_phone_number.append(fake.phone_number())
    list_country.append(fake.country())
    list_email.append(fake.email())
    list_car_number.append(fake.license_plate())
    list_date.append(fake.date_time(tzinfo=None, end_datetime=None))
    list_str.append(fake.pystr(min_chars=0, max_chars=8))
    list_age.append(rd.randint(15,65))
    list_money.append(rd.random()*9999)
dic['Index'] = list_index
dic['Name'] = list_name
dic['Age'] = list_age
dic['Phone'] = list_phone_number
dic['Country'] = list_country
dic['Email'] = list_email
dic['CarNumber'] = list_car_number
dic['Date'] = list_date
dic['Note'] = list_str
dic['Money'] = list_money
df = pd.DataFrame(dic)
print(df.columns)
df.to_excel('./TestExcel/AlbertData.xlsx')

创建文件

import pandas as pd
# DataFrame-->Worksheet
# 初始化数据
df = pd.DataFrame({'ID': [1, 2, 3, 4, 5, 6], 'Name': ['Tim', 'Victor', 'Nick', 'Timthony', 'Yangzhongke', 'Zack']})
# 设置ID为索引，而不是默认的ID 0 1 2
df = df.set_index('ID')
print(df.columns)
print(df)
excelPath = './TestExcel/output.xlsx'  # 使用相对路径
df.to_excel(excelPath)
print('Create excel successfully!')

读取文件

import pandas as pd
excelPath = './TestExcel/output.xlsx'  # 使用相对路径
outExcel = pd.read_excel(excelPath, index_col='ID')
# outExcel = pd.read_excel(excelPath, header=1)  # 从第1行开始作为标题
# 如果excel里面没有header，但是我们知道如何操作？下面两行
# 操作将Name列显示为了SubName
outExcel = pd.read_excel(excelPath, header=None)
outExcel.columns = ['ID', 'SubName']
# 将修改好的数据SubName写到新的文件中
outExcel.set_index('ID', inplace=True)
outExcel.to_excel('./TestExcel/output002.xlsx')
print('Done')
print(outExcel.shape)  # 显示出这个表有几行几列
print(outExcel.columns)  # 显示出所有列名称
print(f'我是文件头两行数据:\n{outExcel.head(2)}')  # 查看前2行
print(f'我是文件尾三行数据:\n{outExcel.tail(3)}')  # 后三行

行，列，单元格

import pandas as pd
# dic = {'x': 100, 'y': 200, 'z': 300}
# s1 = pd.Series(dic)
# 下面这种方式是一样的效果
# index是行号 name是列号 [100,200,300]是行内容
# s1 = pd.Series([100, 200, 300], index=['x', 'y', 'z'])
s1 = pd.Series([1, 2, 3], index=[1, 2, 3], name='A')  # 一行数据
s2 = pd.Series([10, 20, 30], index=[1, 2, 3], name='B')
s3 = pd.Series([100, 200, 300], index=[2, 3, 4], name='C')
# 以Dictionary的形式将Series加入到DataFrame中
df = pd.DataFrame({s1.name: s1, s2.name: s2, s3.name: s3})
# df = df.set_index('A')
# 假如Series index是[2,3,4] 没有的则会填充NaN
print(df.head())

数据区域的读取，填充数字，文字序列

import pandas as pd
from datetime import date, timedelta
# 实现月份累加算法,d是初始时间，md是累加的月份
def add_month(d, md):
    yd = md // 12
    m = d.month + md % 12
    if m != 12:
        yd += m // 12
        m = m % 12
    return date(d.year + yd, m, d.day)
# 跳过空行16行，选取E:H列 指定ID列的数据类型为int(先转为string类型，直接转int类型不行)
books = pd.read_excel('./TestExcel/output004.xlsx', skiprows=16, usecols="E:H",
                      dtype={'ID': str, 'InStore': str, 'Date': str})
startTime = date(2021, 12, 25)
for i in books.index:
    books['ID'].at[i] = i + 1
    print(type(books['ID']))  # 此种写法books['ID']是pandas.core.series.Series对象
    books['InStore'].at[i] = 'Yes' if i % 2 == 0 else 'NO'  # 在Python中if else可以组成表达式用
    # books['Date'].at[i] = startTime + timedelta(days=i)  # 天数依次累加
    # books['Date'].at[i] = date(startTime.year+i,startTime.month,startTime.day)  # 年份依次累加
    books['Date'].at[i] = add_month(startTime, i)  # 月份依次累加
# print(books['ID'])
books.to_excel('./TestExcel/output004Test.xlsx')
print(books)

填充日期序列

import pandas as pd
from datetime import date, timedelta
# 实现月份累加算法,d是初始时间，md是累加的月份
def add_month(d, md):
    yd = md // 12
    m = d.month + md % 12
    if m != 12:
        yd += m // 12
        m = m % 12
    return date(d.year + yd, m, d.day)
# 跳过空行16行，选取E:H列 指定ID列的数据类型为int(先转为string类型，直接转int类型不行)
books = pd.read_excel('./TestExcel/output004.xlsx', skiprows=16, usecols="E:H",
                      dtype={'ID': str, 'InStore': str, 'Date': str})
startTime = date(2021, 12, 25)
for i in books.index:
    books['ID'].at[i] = i + 1
    print(type(books['ID']))  # 此种写法books['ID']是pandas.core.series.Series对象
    books['InStore'].at[i] = 'Yes' if i % 2 == 0 else 'NO'  # 在Python中if else可以组成表达式用
    # books['Date'].at[i] = startTime + timedelta(days=i)  # 天数依次累加
    # books['Date'].at[i] = date(startTime.year+i,startTime.month,startTime.day)  # 年份依次累加
    books['Date'].at[i] = add_month(startTime, i)  # 月份依次累加
# print(books['ID'])
books.to_excel('./TestExcel/output004Test.xlsx')
print(books)

函数填充，计算列

import pandas as pd
def add_2(x):
    return x + 2
books = pd.read_excel('./TestExcel/output005.xlsx', dtype={'ID': str, 'Name': str},index_col='ID')
for i in books.index:
    books['FirstPrice'].at[i] = 20 + i;
    books['LastPrice'].at[i] = 30 + i;
# 列计算无需取出每行，直接列运算即可
books['FinalPrice'] = books['FirstPrice'] * books['LastPrice']
# 自己列加2块钱
books['FinalPrice'] += 2
# 另一种方式追加2块钱，函数
books['FinalPrice'] = books['FinalPrice'].apply(add_2)
# 另一种方式追加2块钱，lambda表达式
books['FinalPrice'] = books['FinalPrice'].apply(lambda x: x + 2)
print(books)
books.to_excel('./TestExcel/output005.xlsx')

排序

import pandas as pd
books = pd.read_excel('./TestExcel/output006.xlsx', dtype={'ID': str, 'Name': str}, index_col='ID')
# inplace=True是为了不生产新的DF
# 按照价格排序 ascending默认为True升序排列
# books.sort_values(by='FinalPrice', inplace=True, ascending=False)
# 先按照Worthly排序，再按照FinalPrice排序
books.sort_values(by=['Worthly', 'FinalPrice'], inplace=True, ascending=[True,False])
print(books)

数据筛选、过滤

import pandas as pd
def lastPrice_32_to_42(a):
    return a >= 32 and a < 42
def finalPrice_657_to_2000(a):
    return 657 <= a < 2000
books = pd.read_excel('./TestExcel/output007.xlsx', index_col='ID')
# 这边loc定位到某列，按照apply里面的函数关系来进行筛选
# books = books.loc[books['LastPrice'].apply(lastPrice_32_to_42)].loc[books['FinalPrice'].apply(finalPrice_657_to_2000)]
# 可以直接.列名来修改
books = books.loc[books.LastPrice.apply(lastPrice_32_to_42)].loc[books.FinalPrice.apply(finalPrice_657_to_2000)]
books.sort_values(by=['Worthly', 'FinalPrice'], inplace=True, ascending=[True, False])
print(books)

柱状图

import pandas as pd
import matplotlib.pyplot as plt
# 绘图，不要添加index_col='Field'索引列
books = pd.read_excel('./TestExcel/output008.xlsx')
print(books.head(5))
# 按照Number列排序 降序 替换原先的DF
books.sort_values(by='Number', ascending=False, inplace=True)
# 使用pandas绘图：横坐标为Field 纵坐标为Number 颜色橘色 标题xxx
# books.plot.bar(x='Field', y='Number',color='orange',title='InterNationalBooks')
# 使用plt绘图
plt.bar(books.Field, books.Number, color='orange')
# 旋转标签90
plt.xticks(books.Field, rotation=90)
plt.xlabel('Field')
plt.ylabel('Number')
plt.title('InternationalBooks', fontsize=16)
# 紧凑型绘图，能够将横坐标全部显示出来
plt.tight_layout()
plt.show()

分组柱图，深度优化

import pandas as pd
import matplotlib.pyplot as plt
books = pd.read_excel('./TestExcel/output009.xlsx')
print(books)
books.sort_values(by=2021, inplace=True, ascending=False)
books.plot.bar(x='Field', y=[2020, 2021],color=['orange','blue'])
plt.title('National Books', fontsize=16, fontweight='bold')
plt.xlabel('Field', fontweight='bold')
plt.ylabel('Number', fontweight='bold')
# 自定义X轴样式:斜45° 文字最末端对齐
ax = plt.gca()
ax.set_xticklabels(books.Field, rotation=45, ha='right')
# 将图片左边和底部固定
figures = plt.gcf()
figures.subplots_adjust(left=0.2,bottom=0.42)
# plt.tight_layout()
plt.show()

叠加柱状图，水平柱状图

import pandas as pd
import matplotlib.pyplot as plt
users = pd.read_excel('./TestExcel/output010.xlsx')
users['Total'] = users.Oct+users.Nov+users.Dec
users.sort_values(by='Total',ascending=False,inplace=True)
print(users)
# 分组柱状图-->叠加柱状图stacked
users.plot.bar(x='Name',y=['Oct','Nov','Dec'],stacked=True)
plt.title('User Behavior',fontsize=24,fontweight='bold')
# 旋转X轴文字45°
ax = plt.gca()
ax.set_xticklabels(users.Name,rotation=45,ha='right')
plt.tight_layout()
# 水平柱状图
users.sort_values(by='Total',ascending=True,inplace=True)
users.plot.barh(x='Name',y=['Oct','Nov','Dec'],stacked=True)
plt.tight_layout()
plt.show()

饼图

import pandas as pd
import matplotlib.pyplot as plt
students = pd.read_excel('./TestExcel/output011.xlsx',index_col='From')
print(students)
# 绘制饼图,默认情况下以index_col作为图例显示饼图
# 排序以顺时针形式呈现，并将开始位置设置为-250
# students['2017'].sort_values().plot.pie(startangle=-250)
# 第二种方式counterclock=False顺时针方式呈现
students['2017'].plot.pie(counterclock=False)
plt.title('Source of International Students')
plt.ylabel('2017',fontweight='bold')
plt.show()

折线趋势图，叠加区域图

import pandas as pd
import matplotlib.pyplot as plt
weeks = pd.read_excel('./TestExcel/output012.xlsx',index_col='Week')
print(weeks)
print(weeks.columns)
# 折线图
weeks.plot(y=['Accessories', 'Bikes', 'Clothing', 'Components', 'Grand Total'])
plt.title('Sales Weekly Trend',fontsize=16,fontweight='bold')
plt.ylabel('Total',fontsize=12,fontweight='bold')
plt.xticks(weeks.index)
# 叠加区域图
weeks.plot.area(y=['Accessories', 'Bikes', 'Clothing', 'Components', 'Grand Total'])
plt.title('Sales Weekly Trend',fontsize=16,fontweight='bold')
plt.ylabel('Total',fontsize=12,fontweight='bold')
plt.xticks(weeks.index)
plt.show()

散点图、直方图、密度图、数据相关性

import pandas as pd
import matplotlib.pyplot as plt 
# 显示所有列
pd.options.display.max_columns =777
homes = pd.read_excel('./TestExcel/output013.xlsx')
# print(homes.head())
# 散点图scatter
#homes.plot.scatter(x='sqft_living',y='price')
# 直方图hist 分布区间bins
# homes.sqft_living.plot.hist(bins=100)
# plt.xticks(range(0,max(homes.sqft_living),5000),fontsize=8,rotation=90)
# 密度图kde
homes.sqft_living.plot.kde()
plt.xticks(range(0,max(homes.sqft_living),5000),fontsize=8,rotation=90)
plt.show()
# 数据相关性分析 column row relation
print(homes.corr())

多表联合(从VLOOKUP到JOIN)

连接了两张表，一张学生姓名表，一张分数表

from typing import SupportsRound
import pandas as pd 
students = pd.read_excel('./TestExcel/output014.xlsx',sheet_name='Students',index_col='ID')
scores = pd.read_excel('./TestExcel/output014.xlsx',sheet_name='Scores',index_col='ID')
# students表join scores表 how表示不论是否能查询到数据始终保持students表的数据 以ID列来join
# fillna 用0来填充空值
# 如果左右两边ID列名字不一样 left_on right_on
# table = students.merge(scores,how='left',left_on=students.index, right_on=scores.index).fillna(0)
table = students.join(scores,how='left').fillna(0)
# 将浮点数转为原始的int数据
table.Score = table.Score.astype(int)
print(table)

数据校验、轴的概念

import pandas as pd
def validate_score(x):
    """[校验函数]  
    Args:
        x ([object]): [学生信息]
    """    
    try:
        assert 0<=x.Score<=100
    except: 
        print(f'#{x.ID}\tStudent {x.Name} has an invalid score {x.Score}')
    #第二种写法
    #if not 0<=x.Score<=100:
        #print(f'#{x.ID}\tStudent {x.Name} has an invalid score {x.Score}')
students = pd.read_excel('./TestExcel/output015.xlsx')
# axis默认为0,从上到下  1,从左到右进行校验
students.apply(validate_score,axis=1)
# print(students)

把一列数据分割成两列

import pandas as pd
Students = pd.read_excel('./TestExcel/output015.xlsx')
#  一列数据分割成两列expand=True n=0切出来多少子字符串就保留多少
df = Students.Name.str.split('_',expand=True)
Students['StudentTitle'] = df[0].str.upper()
Students['StudentIndex'] = df[1]
print(Students)

求和、求平均、统计引导

import pandas as pd
house_price = pd.read_excel('./TestExcel/output013.xlsx')
# DataFrame.sum()
# axis 1横轴 0纵轴 sum求和 mean求平均
temp_total = house_price[['bedrooms','bathrooms']].sum(axis=1)
temp_average = house_price[['bedrooms','bathrooms']].mean(axis=1)
house_price['TotalRooms'] = temp_total
house_price['AverageRooms'] = temp_average
# house_price['AverageRooms'] .astype(int)
# 追加列方向的累加
col_sum = house_price[['ID','price','bedrooms','bathrooms','sqft_living','sqft_basement']].sum()
col_sum['TotalRooms'] = 'Summary'
# 将自动按照col_sum的列名来进行追加
house_price = house_price.append(col_sum,ignore_index=True)
print(house_price)

定位、消除重复数据

from os import dup
import pandas as pd 
students = pd.read_excel('./TestExcel/output009.xlsx')
row_sum = students[['Firstyear','Lastyear']].sum(axis=1)
students['TotalSales'] = row_sum
col_sum = students[['Firstyear','Lastyear','TotalSales']].sum()
col_sum['Field'] = 'Summary'
students = students.append(col_sum,ignore_index=True)
print(students)
print('\n')
# 取出那些重复的数据
dupe = students.duplicated(subset='Field')
print(dupe.any())
print(dupe.count())
print(dupe[dupe]) #自己和True比较
dupe = dupe[dupe==True]
print(f'{dupe.index}\n')
print(f'{students.iloc[dupe.index]}\n') #取出那些数据
# 消除重复数据，基于哪些列 subset=['Field',Firstyear'] keep='first'/'last'保留开始还是后面的
students.drop_duplicates(subset='Field',inplace=True,keep='first')
print(students)

旋转数据表（行转换）

import pandas as pd
# 显示最大列
pd.options.display.max_columns = 999
videos = pd.read_excel('./TestExcel/output016.xlsx',index_col='Month')
# 旋转表格，要将index_col设置为旋转坐标列Month，不然就会以默认index来作为列名
videos = videos.transpose()
print(videos)

读取CVS、TSV、TXT文件中的数据

import pandas as pd
students1 = pd.read_csv('./TestOtherFiles/Students.csv')
print(students1)
students2 = pd.read_csv('./TestOtherFiles/Students.txt',sep='|')
print(students2)

透视表、分组、聚合(GroupBy)

import pandas as pd
import numpy as np
pd.options.display.max_columns = 777
orders = pd.read_excel('./TestExcel/output017.xlsx')
orders['Year'] = pd.DatetimeIndex(orders.Date).year
print(orders)
piovot_table1=orders.pivot_table(index='Category',columns='Year',values='Total',aggfunc=np.sum)
print(piovot_table1)
groups = orders.groupby(['Category','Year'])
s = groups['Total'].sum()
c = groups['ID'].count()
piovot_table2 = pd.DataFrame({'Sum':s,'Count':c})
print(piovot_table2)

线性回归、数据预测

数据分析和机器学习的边界

import pandas as pd
import matplotlib.pyplot as plt 
from scipy.stats import linregress
def validate_revenue(series):
    if not 5<series.Revenue<20:
        print(f'{series}值不在范围内')
sales = pd.read_excel('./TestExcel/output018.xlsx',dtype={'Month':str})
# 斜率，截距
slope,intercept,r,p,std_err = linregress(sales.index,sales.Revenue)
# 线性回归方程
exp = sales.index*slope+intercept
# sales.plot.area(y='Revenue')
# 散点图
plt.scatter(x=sales.index,y=sales['Revenue'],color='orange')
plt.plot(sales.index,exp,color='blue')
#plt.bar(sales.index,sales.Revenue)
ax = plt.gca()
ax.set_xticklabels(sales['Month'],rotation=45,ha='right')
plt.title(f'y={slope}*X+{intercept}')
plt.tight_layout()
plt.show()
# 横向验证，传入函数的是series
#sales.apply(validate_revenue,axis=1)
#print(sales.Month.dtype)

条件格式化

anacoda input: jupyter notebook

import pandas as pd
def low_score_red(s):
    color = 'red' if s<21000 else 'black'
    return f'color:{color}'
def highest_score_green(col):
    #把字体编程绿色，底层变为黄绿色
    return ['background-color:lime;color:green' if s==col.max() else 'background-color:white' for s in col]
students = pd.read_excel('F:/Repo/albertpython/TestExcel/output011.xlsx')
students.style.applymap(low_score_red,subset=['2016','2017'])\
.apply(highest_score_green,subset=['2016','2017'])

import pandas as pd
import seaborn as sns
color_map = sns.light_palette('green',as_cmap=True)
students = pd.read_excel('F:/Repo/albertpython/TestExcel/output011.xlsx')
# 将颜色深度应用到数据列上
students.style.background_gradient(color_map,subset=['2016','2017'])

import pandas as pd
#import seaborn as sns
#color_map = sns.light_palette('green',as_cmap=True)
students = pd.read_excel('F:/Repo/albertpython/TestExcel/output011.xlsx')
# 将颜色深度应用到数据列上
# students.style.background_gradient(color_map,subset=['2016','2017'])
students.style.bar(color='orange',subset=['2016','2017'])

行操作集锦

import pandas as pd
students1 = pd.read_excel('F:/Repo/albertpython/TestExcel/output011.xlsx',sheet_name='Sheet1')
students2 = pd.read_excel('F:/Repo/albertpython/TestExcel/output011.xlsx',sheet_name='Sheet2')
# 两个Sheet合并为一个,重置index_col drop=True放弃原来的
student_total = students1.append(students2).reset_index(drop=True)
# 追加手动创建的一行
stu = pd.Series({'Rank':100,'From':'USA','2016':90,'2017':100})
student_total=student_total.append(stu,ignore_index=True)
# 更改其中一行的值
student_total['From'].at[13] = 'Mexico3'
student_total.at[12,'From'] = 'Japan3'
# 插入一行 切片技术
stu = pd.Series({'Rank':101,'From':'USB','2016':90,'2017':100})
# 左闭右开
part1 = student_total[:10]
part2 = student_total[10:]
student_total = part1.append(stu,ignore_index=True).append(part2).reset_index(drop=True)
# 删除数据行
# student_total.drop(index=[0,1,2],inplace=True)
# student_total.drop(index=range(4,6),inplace=True)
for i in range(5,8):
    student_total['From'].at[i] =''
missing_data = student_total.loc[student_total['From']=='']
student_total.drop(index=missing_data.index,inplace=True) #切片的方式传入
student_total.reset_index(drop=True) # 重新铺一下index
print(student_total)

列操作集锦

import pandas as pd
import numpy as np
students = pd.read_excel('./TestExcel/output014.xlsx',sheet_name='Students')
scores = pd.read_excel('./TestExcel/output014.xlsx',sheet_name='Scores')
# 列横向拼接
list_total = pd.concat([students,scores],axis=1).reset_index(drop=True)
print(list_total)
# 行追加第二种写法
list_total2 = pd.concat([students,scores],ignore_index=True).reset_index(drop=True)
print(list_total2)
# 将ID相同Join进来
list_total3 = students.merge(scores,how='left',on=['ID','ID'])
# 追加一列：
list_total3['Age'] = 25
list_total3['RealAge'] = np.arange(0,len(list_total3))
print(f'Total3:\n{list_total3}\n')
print(np.arange(0,len(list_total3)))
# 删除列
list_total3.drop(columns=['Age','RealAge'],inplace=True)
print(f'DropTotal3:\n{list_total3}\n')
# 插入列
list_total3.insert(1,column='Foo',value=26)
print(f'DropTotal3:\n{list_total3}\n')
# 列名大写
list_total3.rename(columns={'Foo':'FOO'},inplace=True)
print(f'Upper:\n{list_total3}\n')
# 删除空行
list_total3.dropna(inplace=True)
print(f'Dropnan:\n{list_total3}\n')

读取数据库

import pandas as pd
import numpy as np
import pyodbc as dbc 
# ORM框架 create_engine
import sqlalchemy
books = pd.read_excel('./TestExcel/output019.xlsx')
print(books)
# 此处注意Server={ServerName} ServerName未必是{local}
connection = dbc.connect('Driver={SQL Server};'
                      'Server={DESKTOP-07OJ9VD};'
                      'Database=AlbertBook;'
                      'Trusted_Connection=yes;')
query = 'Select Title from T_Books'
df1 = pd.read_sql_query(query,connection)
print(df1)

编写复杂方程

import pandas as pd
import numpy as np
def cal_bike_complex_clothing(b,c):
    return b**2+c*np.pi
sales = pd.read_excel('./TestExcel/output012.xlsx')
sales['complex'] = sales.apply(lambda row:cal_bike_complex_clothing(row['Bikes'],row['Clothing']),axis=1)
#将列名大写
sales.rename(columns={'complex':'Complex'},inplace=True)
sales.drop(columns={'Components'},inplace=True)
print(sales)

01_DotNet技术官

02 Pandas玩转Excel

创建虚拟数据

创建文件

读取文件

行，列，单元格

数据区域的读取，填充数字，文字序列

填充日期序列

函数填充，计算列

排序

数据筛选、过滤

柱状图

分组柱图，深度优化

叠加柱状图，水平柱状图

饼图

折线趋势图，叠加区域图

散点图、直方图、密度图、数据相关性

多表联合(从VLOOKUP到JOIN)

数据校验、轴的概念

把一列数据分割成两列

求和、求平均、统计引导

定位、消除重复数据

旋转数据表（行转换）

读取CVS、TSV、TXT文件中的数据

透视表、分组、聚合(GroupBy)

线性回归、数据预测

条件格式化

行操作集锦

列操作集锦

读取数据库

编写复杂方程