列表去重
以前经常用遍历或者科学计算库等方法,其实有一个很简单的技巧,那就是利用集合的互异性
>>> a
[0, 1, 2, 3, 1]
>>> b = set(a)
>>> b
{0, 1, 2, 3}
>>> list(b)
[0, 1, 2, 3]
列表中去除重复字典
对嵌套的字典无效[dict(t) for t in set([tuple(d.items()) for d in the_list])]
li = [dict(t) for t in set([tuple(d.items()) for d in li])]
修改删除列表部分值
>>> a = [1, 2, 3, 4, 5]
>>> a[2:3] = [0, 0]
>>> a
[1, 2, 0, 0, 4, 5]
>>> a[1:1] = [8, 9]
>>> a
[1, 8, 9, 2, 0, 0, 4, 5]
>>> a[1:-1] = []
>>> a
[1, 5]
压缩和解压缩
# 压缩
import zipfile
import os
def zipDir(dirpath,outFullName):
"""
压缩指定文件夹
:param dirpath: 目标文件夹路径
:param outFullName: 压缩文件保存路径+xxxx.zip
:return: 无
"""
zip = zipfile.ZipFile(outFullName,"w",zipfile.ZIP_DEFLATED)
for path,dirnames,filenames in os.walk(dirpath):
# 去掉目标跟路径,只对目标文件夹下边的文件及文件夹进行压缩
fpath = path.replace(dirpath,'')
for filename in filenames:
zip.write(os.path.join(path,filename),os.path.join(fpath,filename))
zip.close()
print(1)
startdir = "MathAmino/train_models" #要压缩的文件夹路径
file_news = 'mytrain.zip' # 压缩后文件夹的名字
zipDir(startdir,file_news)
# 解压缩
# 压缩文件
import zipfile
f = zipfile.ZipFile("/home/kesci/MathAmino.zip",'r')
for file in f.namelist():
f.extract(file,"/home/kesci/work/")
列表组合为字典
>>> a = [1, 2, 3]
>>> b = ['a', 'b', 'c']
>>> z = zip(a, b)
>>> z
[(1, 'a'), (2, 'b'), (3, 'c')]
>>> zip(*z)
[(1, 2, 3), ('a', 'b', 'c')]
两个一一对应列表转化为字典
>>> a = [1, 2, 3]
>>> b = ['a', 'b', 'c']
>>> z = zip(a, b)
>>> z
[(1, 'a'), (2, 'b'), (3, 'c')]
>>> d = dict(z)
>>> d
{1: 'a', 2: 'b', 3: 'c'}
翻转字典
>>> m = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
>>> m.items()
[('a', 1), ('c', 3), ('b', 2), ('d', 4)]
>>> zip(m.values(), m.keys())
[(1, 'a'), (3, 'c'), (2, 'b'), (4, 'd')]
>>> mi = dict(zip(m.values(), m.keys()))
>>> mi
{1: 'a', 2: 'b', 3: 'c', 4: 'd'}
集合操作
>>> A = {1, 2, 3, 3}
>>> A
set([1, 2, 3])
>>> B = {3, 4, 5, 6, 7}
>>> B
set([3, 4, 5, 6, 7])
>>> A | B
set([1, 2, 3, 4, 5, 6, 7])
>>> A & B
set([3])
>>> A - B
set([1, 2])
>>> B - A
set([4, 5, 6, 7])
>>> A ^ B
set([1, 2, 4, 5, 6, 7])
>>> (A ^ B) == ((A - B) | (B - A))
True
在字符串中找指定字符串位置
使用list的index方法可以找到list中第一次出现该元素的位置>>> l ``=
[``'a'``,``'b'``,``'c'``,``'c'``,``'d'``,``'c'``]
>>> find``=``'b'
>>> l.index(find)
1
找出出现该元素的所有位置可以使用一个简单的表理解来实现>>> find ``=
'c'
>>> [i ``for
i,v ``in
enumerate``(l) ``if
v``=``=``find]
[``2``, ``3``, ``5``]
列表倒序
>>> x = [1,5,2,3,4]
>>> x.reverse()
>>> x
[4, 3, 2, 5, 1]
找出列表中最大或最小的三个
# -*- coding: utf-8 -*-
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 24, 37, 2]
# 最大的3个数的索引
max_num_index_list = map(nums.index, heapq.nlargest(3, nums))
# 最小的3个数的索引
min_num_index_list = map(nums.index, heapq.nsmallest(3, nums))
print(list(max_num_index_list))
print(list(min_num_index_list))
列表平均分为n份
# listTemp 为列表 分成每份n个元素的列表
def func(listTemp, n):
for i in range(0, len(listTemp), n):
yield listTemp[i:i + n]
平均分为n份
def average_func(m, n):
f = False
s = len(m) // n
lef = len(m) % n
lop = 0
stopat = 0
if lef != 0:
s += 1
f = True
ret = []
if f:
for i in range(lef):
ret.append(m[i*s:(i+1)*s])
stopat = i*s+1
lop = i
s -= 1
for i in range(1, n-lop):
ret.append(m[stopat+i*s:stopat+(i+1)*s])
return ret
else:
for i in range(n):
ret.append(m[i*s:(i+1)*s])
return ret
统计列表中重复元素的个数
>>> from collections import Counter
>>> Counter([1,2,2,2,2,3,3,3,4,4,4,4])
Counter({2: 4, 4: 4, 3: 3, 1: 1})
查看当前文件夹中文件
for (dirpath, dirnames, filenames) in os.walk("try_test"):
print(dirpath, dirnames, filenames)
存格式化的json文件
json_str = json.dumps(your_list, ensure_ascii=False, indent=4) # 缩进4字符
with open(your_path, 'w') as json_file:
json_file.write(json_str)
读取json文件
with open("../config/record.json",'r') as load_f:
load_dict = json.load(load_f)