单列拆分
效果预览:下图是按County列拆分
if __name__ == '__main__':
import pandas as pd
# 创建示例dataframe
df = pd.DataFrame({'Country': ['China,US', 'Japan,EU', 'UK,Australia', 'Singapore,Netherland'],
'Number': [100, 150, 120, 90],
'Value': [1, 2, 3, 4],
'label': list('abcd')})
# 按Country拆分
df_split_row = df.drop('Country', axis=1).join(
df['Country'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('Country'))
print(df)
print("*"*20, "拆分后效果", "*"*20)
print(df_split_row)
多列拆分
效果预览:按Color列和Size列拆分
# 把拆分多行封装成函数
def df_split_row(df, split_cols: list):
"""
拆分列以指定分隔符为多行
:param df: 待拆分dataframe
:param split_cols: 指定拆分的列,可以单列、多列
:return: 拆分后的dataframe
"""
df_convert = df.drop(columns=split_cols, axis=1)
for column in split_cols:
df_convert = df_convert.join(
df[column].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename(column))
df_convert = df_convert.reset_index(drop=True)
return df_convert
if __name__ == '__main__':
import pandas as pd
# 创建示例dataframe
df = pd.DataFrame({'Code': ['212027'],
'Color': ['blue, pink, yellow'],
'Size': ['12-18M, 2-3Y, 3-4Y']})
print(df_split_row(df, split_cols=['Color', 'Size']))
print(df_split_row(df, split_cols=['Size']))