1. import os
    2. import time
    3. import numpy as np
    4. import pandas as pd
    5. data = np.random.standard_normal((10 ** 7, 4))
    6. df = pd.DataFrame(data)
    7. methods = [
    8. 'default',
    9. "zlib",
    10. "lzo",
    11. "bzip2",
    12. "blosc",
    13. 'blosc:blosclz',
    14. "blosc:lz4",
    15. "blosc:lz4hc",
    16. "blosc:snappy",
    17. "blosc:zlib",
    18. "blosc:zstd",
    19. ]
    20. for method in methods:
    21. level, comp = 9, method
    22. if method == 'default':
    23. level, comp = None, None
    24. start_time = time.time()
    25. file = f"./data/{method.replace(':', '-')}.h5"
    26. df.to_hdf(file, 'df', complevel=level, complib=comp)
    27. print(method, os.path.getsize(file), time.time() - start_time, sep='\t')

    性能比较:

    1. method size time
    2. -------------------------------------------------
    3. default 400007240 2.722801923751831
    4. zlib 283500043 166.24143624305725
    5. lzo 305977502 0.4248645305633545
    6. bzip2 291635531 35.23882055282593
    7. blosc 309459970 1.6485953330993652
    8. blosc:blosclz 309459994 1.662555456161499
    9. blosc:lz4 305701404 0.4089069366455078
    10. blosc:lz4hc 293242558 16.87893295288086
    11. blosc:snappy 324139336 0.3809819221496582
    12. blosc:zlib 281797972 164.94474864006042
    13. blosc:zstd 287126493 42.24786901473999