- concat : 根据索引合并表
- merge:根据 key 合并表
1. pd.concat 函数使用
import pandas as pd
# 1. pd.concat 实现数据合并
left = pd.DataFrame({
'key1': ['K0', 'K0', 'K1', 'K2'],
'key2': ['K0', 'K1', 'K0', 'K1'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({
'key1': ['K0', 'K1', 'K1', 'K2'],
'key2': ['K0', 'K0', 'K0', 'K0'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
# 设置行索引
left.index = ["a1", "a2", "a3", "a4"]
right.index = ["a1", "a2", "b3", "b4"]
# 按照列索引对齐
pd.concat([left, right], axis=0)
# 按照行索引对齐
pd.concat([left, right], axis=1)
2. pd.merge 函数使用
import pandas as pd
# 1. pd.concat 实现数据合并
left = pd.DataFrame({
'key1': ['K0', 'K0', 'K1', 'K2'],
'key2': ['K0', 'K1', 'K0', 'K1'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({
'key1': ['K0', 'K1', 'K1', 'K2'],
'key2': ['K0', 'K0', 'K0', 'K0'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
# 设置行索引
left.index = ["a1", "a2", "a3", "a4"]
right.index = ["a1", "a2", "b3", "b4"]
# 左连接,以左表 key1、key2 为主,匹配右表数据
pd.merge(left, right, how="left", on=["key1", "key2"])
# 两个表的并集:两个表中 key1、key2 的并集
pd.merge(left, right, how="outer", on=["key1", "key2"])
# 两个表交集:两个表中都存在的 key1 key2 才会合并一张表
pd.merge(left, right, how="inner", on=["key1", "key2"])