os

os.linesep

'\r\n'

subprocess

sp.run(f'which conda',shell=True,capture_output=True)

configparser

ini文件

self.working_directory = os.path.abspath(‘’)
cf = configparser.ConfigParser()
cf.read(pbs_config)
self.items = cf.items(“PBS_template”)

日期 & 时间 datatime time

import time
import datetime

now = time.time()
now
localtime = time.localtime(now)
localtime

1607146324.9261053
time.struct_time(tm_year=2020, tm_mon=12, tm_mday=5, tm_hour=13, tm_min=32, tm_sec=4, tm_wday=5, tm_yday=340, tm_isdst=0)

time.strftime("%a %b %d %H:%M:%S %y",localtime)
time.strftime('%#m/%d/%Y %#I:%M%p',localtime)

'Sat Dec 05 13:32:04 20'
'12/05/2020 1:32PM'

time.strftime('%Y/%m/%d %H:%M:%S',time.localtime())

'2020/12/05 13:32:49'

%y 两位数的年份表示（00-99）
%Y 四位数的年份表示（000-9999）
%m 月份（01-12）
%d 月内中的一天（0-31）
%H 24小时制小时数（0-23）
%I 12小时制小时数（01-12）
%M 分钟数（00-59）
%S 秒（00-59）
%a 本地简化星期名称
%A 本地完整星期名称
%b 本地简化的月份名称
%B 本地完整的月份名称
%c 本地相应的日期表示和时间表示
%j 年内的一天（001-366）
%p 本地A.M.或P.M.的等价符
%U 一年中的星期数（00-53）星期天为星期的开始
%w 星期（0-6），星期天为星期的开始
%W 一年中的星期数（00-53）星期一为星期的开始
%x 本地相应的日期表示
%X 本地相应的时间表示
%Z 当前时区的名称
%% %号本身
%#(windows)/%-(linux) 删除%m,%d等前面的空0

day = time.strptime('2020-08-13','%Y-%m-%d')
day
time.strftime("%a %b %d %H:%M:%S %y",day)

time.struct_time(tm_year=2020, tm_mon=8, tm_mday=13, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=226, tm_isdst=-1)
'Thu Aug 13 00:00:00 20'

datetime.datetime.now()
datetime.datetime.today()
now = datetime.datetime.now()
now.year,now.month,now.day,now.hour,now.minute,now.second
now.date(),now.time()
now + datetime.timedelta(days=1)

datetime.datetime(2020, 11, 22, 19, 19, 5, 495930)
datetime.datetime(2020, 11, 22, 19, 19, 5, 506972)
(2020, 11, 22, 19, 19, 5)
(datetime.date(2020, 11, 22), datetime.time(19, 19, 5, 519924))
datetime.datetime(2020, 11, 23, 19, 19, 5, 519924)

正则表达式 re

正则表达式(regular expression)描述了一种字符串匹配的模式（pattern），可以用来检查一个串是否含有某种子串、将匹配的子串替换或者从某个串中取出符合某个条件的子串等。
正则很有用，此处用python讲解一下正则的用法
学习正则的一个网站 https://regexr.com/ 只不过是英文而且正则的写法是基于js的。

import re

a = "我的邮箱是yangjingkang@126.com"
groups = re.search(r'([a-zA-Z0-9_]*)@(\w*?\.com)', a)
groups
groups.group(0)
groups.group(1)
groups.group(2)

<_sre.SRE_Match object; span=(5, 25), match='yangjingkang@126.com'>
'yangjingkang@126.com'
'yangjingkang'
'126.com'

a = '我的好孩子'
re.search(r'\w*', a)

<_sre.SRE_Match object; span=(0, 5), match='我的好孩子'>

re.sub('^ERCC(?!-)','ERCC-',i) # negative lookahead 
[i for i in df.columns if i.startswith('ERCC') and not i.startswith('ERCC-')]
import re
a = 'echo ${path_to_gatk}'
re.sub(r'echo \$\{path_to_([a-zA-Z]+)\}',r'${path_to_\1}',a)

爬虫 requests

import requests
s = requests.session()
cookies = {'s':1000}
s.cookies = requests.utils.cookiejar_from_dict(cookies)

https://blog.csdn.net/g28757/article/details/109363717 selenium配置Edge 全屏 更改ua 等

多进程 multiprocessing

#!/usr/bin/env python 
# -*- coding: utf-8 -*-
import multiprocessing as mp
import random
def do_some_thing(results1):
    results1.append(random.randint(1,10))
manager = mp.Manager()
pool = mp.Pool()
results = manager.list()
for peaks in range(10):
    pool.apply_async(do_some_thing, args=(results))
pool.close()
pool.join()
print(split_read_mates)

高效循环 itertools

无限迭代器
count(firstval=0, step=1) 创建一个从 firstval (默认值为 0) 开始，以 step (默认值为 1) 为步长的的无限整数迭代器

cycle(iterable) 对 iterable 中的元素反复执行循环，返回迭代器

repeat(object ,times 反复生成 object，如果给定 times，则重复次数为 times，否则为无限

有限迭代器

chain() compress() dropwhile() groupby() ifilter() ifilterfalse() islice() imap() starmap() tee() takewhile() izip() izip_longest()

import itertools
itertools.combinations([1,2,3],2)
list(itertools.combinations([1,2,3],2))
list(itertools.combinations_with_replacement([1,2,3],2))
list(itertools.permutations([1,2,3]))
list(itertools.product([1,2,3],[1,2]))
list(itertools.product([1,2,3],[1,2]))
list(itertools.chain([1,2,3],[3,4,5],[4,5,6]))
list(itertools.zip_longest([1,2,3],[1,2]))

<itertools.combinations at 0x18d4a4adc78>
[(1, 2), (1, 3), (2, 3)]
[(1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]
[(1, 2, 3), (1, 3, 2), (2, 1, 3), (2, 3, 1), (3, 1, 2), (3, 2, 1)]
[(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)]
[(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)]
[1, 2, 3, 3, 4, 5, 4, 5, 6]
[(1, 1), (2, 2), (3, None)]

from itertools import groupby 
d1={'name':'zhangsan','age':20,'country':'China'}
d2={'name':'wangwu','age':19,'country':'USA'}
d3={'name':'lisi','age':22,'country':'JP'}
d4={'name':'zhaoliu','age':22,'country':'USA'}
d5={'name':'pengqi','age':22,'country':'USA'}
d6={'name':'lijiu','age':22,'country':'China'}
lst=[d1,d2,d3,d4,d5,d6]
lst.sort(key=lambda x:x['country']) #需要先排序，然后才能groupby。
lstg = groupby(lst,key=lambda x:x['country']) 
#lstg = groupby(lst,key=lambda x:x['country']) 等同于使用itemgetter()
for key,group in lstg:
    for g in group: #group是一个迭代器，包含了所有的分组列表
        print(key,g)

China {'name': 'zhangsan', 'age': 20, 'country': 'China'}
China {'name': 'lijiu', 'age': 22, 'country': 'China'}
JP {'name': 'lisi', 'age': 22, 'country': 'JP'}
USA {'name': 'wangwu', 'age': 19, 'country': 'USA'}
USA {'name': 'zhaoliu', 'age': 22, 'country': 'USA'}
USA {'name': 'pengqi', 'age': 22, 'country': 'USA'}

操作符 operator

import operator
a = operator.itemgetter(0)
a([1,2,3])
a = lambda x:x[0]
a([1,2,3])

1
1

parse库

subprocess

https://zhuanlan.zhihu.com/p/140555017

code snippet

python_package

os

configparser

日期 & 时间 datatime time

正则表达式 re

爬虫 requests

多进程 multiprocessing

高效循环 itertools

操作符 operator

subprocess