1. import pandas as pd
    2. import numpy as np
    3. def f1(a1, a2):
    4. n = 0
    5. for i in a1:
    6. if i in a2:
    7. n += 1
    8. for i in a2:
    9. if i in a1:
    10. n += 1
    11. return n
    12. def f3(a1, a2):
    13. n = 0
    14. n = map(lambda x, y: f1(x, y), a1, a2)
    15. return list(n)
    16. N = 5
    17. data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'], dtype=str)
    18. %timeit data1 = data['A'].combine(data['B'], f1)
    19. %timeit data1 = data.apply(lambda x: f1(x['A'], x['B']), axis=1)
    20. %timeit data1 = f3(data['A'], data['B'])
    21. %timeit data1 = [f1(x, y) for x, y in zip(data['A'], data['B'])]
    22. %timeit data1 = f3(data['A'].values, data['B'].values)
    23. # 218 µs ± 4.19 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
    24. # 698 µs ± 1.07 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
    25. # 37 µs ± 20.5 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
    26. # 36.5 µs ± 26.5 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
    27. # 16.9 µs ± 15.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
    28. N = 500
    29. data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'], dtype=str)
    30. %timeit data1 = data['A'].combine(data['B'], f1)
    31. %timeit data1 = data.apply(lambda x: f1(x['A'], x['B']), axis=1)
    32. %timeit data1 = f3(data['A'], data['B'])
    33. %timeit data1 = [f1(x, y) for x, y in zip(data['A'], data['B'])]
    34. %timeit data1 = f3(data['A'].values, data['B'].values)
    35. # 13 ms ± 11.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
    36. # 11.2 ms ± 18.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
    37. # 1.02 ms ± 1.47 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
    38. # 982 µs ± 733 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
    39. # 1.01 ms ± 1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
    40. N = 50000
    41. data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'], dtype=str)
    42. %timeit data1 = data['A'].combine(data['B'], f1)
    43. %timeit data1 = data.apply(lambda x: f1(x['A'], x['B']), axis=1)
    44. %timeit data1 = f3(data['A'], data['B'])
    45. %timeit data1 = [f1(x, y) for x, y in zip(data['A'], data['B'])]
    46. %timeit data1 = f3(data['A'].values, data['B'].values)
    47. # 1.29 s ± 3.74 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
    48. # 1.04 s ± 2.83 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
    49. # 99.6 ms ± 58.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
    50. # 96.7 ms ± 121 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

    显然,.apply方法效率并不高。