1. import pandas as pd
    2. import numpy as np
    3. N = 10
    4. data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'])
    5. data1 = data.copy()
    6. data2 = data.copy()
    7. %time data1 = data.query('A > 4')
    8. %time data2 = data[data['A'] > 4]
    9. # CPU times: user 9.29 ms, sys: 3.84 ms, total: 13.1 ms
    10. # Wall time: 11.7 ms
    11. # CPU times: user 681 µs, sys: 0 ns, total: 681 µs
    12. # Wall time: 637 µs
    13. N = 1000
    14. data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'])
    15. data1 = data.copy()
    16. data2 = data.copy()
    17. %time data1 = data.query('A > 4')
    18. %time data2 = data[data['A'] > 4]
    19. # CPU times: user 988 µs, sys: 2.53 ms, total: 3.52 ms
    20. # Wall time: 2.64 ms
    21. # CPU times: user 1.64 ms, sys: 0 ns, total: 1.64 ms
    22. # Wall time: 1.18 ms
    23. N = 100000
    24. data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'])
    25. data1 = data.copy()
    26. data2 = data.copy()
    27. %time data1 = data.query('A > 4')
    28. %time data2 = data[data['A'] > 4]
    29. # CPU times: user 5.78 ms, sys: 2.95 ms, total: 8.73 ms
    30. # Wall time: 8.24 ms
    31. # CPU times: user 3.11 ms, sys: 781 µs, total: 3.89 ms
    32. # Wall time: 3.45 ms
    33. N = 10000000
    34. data = pd.DataFrame(np.random.uniform(1,9,(N,2)), columns=['A', 'B'])
    35. data1 = data.copy()
    36. data2 = data.copy()
    37. %time data1 = data.query('A > 4')
    38. %time data2 = data[data['A'] > 4]
    39. # CPU times: user 448 ms, sys: 416 ms, total: 864 ms
    40. # Wall time: 568 ms
    41. # CPU times: user 218 ms, sys: 173 ms, total: 391 ms
    42. # Wall time: 390 ms

    显然,data[data[‘A’] > 4] 较于 data.query(‘A > 4’) 更有优势。