import osimport timeimport numpy as npimport pandas as pddata = np.random.standard_normal((10 ** 7, 4))df = pd.DataFrame(data)methods = ['default',"zlib","lzo","bzip2","blosc",'blosc:blosclz',"blosc:lz4","blosc:lz4hc","blosc:snappy","blosc:zlib","blosc:zstd",]for method in methods:level, comp = 9, methodif method == 'default':level, comp = None, Nonestart_time = time.time()file = f"./data/{method.replace(':', '-')}.h5"df.to_hdf(file, 'df', complevel=level, complib=comp)print(method, os.path.getsize(file), time.time() - start_time, sep='\t')
性能比较:
method size time-------------------------------------------------default 400007240 2.722801923751831zlib 283500043 166.24143624305725lzo 305977502 0.4248645305633545bzip2 291635531 35.23882055282593blosc 309459970 1.6485953330993652blosc:blosclz 309459994 1.662555456161499blosc:lz4 305701404 0.4089069366455078blosc:lz4hc 293242558 16.87893295288086blosc:snappy 324139336 0.3809819221496582blosc:zlib 281797972 164.94474864006042blosc:zstd 287126493 42.24786901473999
