IO编程 - python之文件操作 - 《Python 基础教程》

r模式
w模式
a模式
tell()查询文件中光标位置
seek()光标定位
flush 同步将数据从缓存转移到磁盘
truncate截断
with打开多个文件
StringIO
文件常用读写总结
内置模块fileinput读取文件

r模式

read()：逐字符读取，可指定参数
readline()：只能读取第一行代码
readlines()：读取内容以列表的形式输出

w模式

进行操作前，文件中的数据会被清空
write()：将字符串写入文件，返回写入的字符长度
writelines()：将字符串或字符串序列写入文件，无返回

a模式

追加写入

tell()查询文件中光标位置

seek()光标定位

f = open('file','r')
print(f.read(6)) #6个字符
print(f.tell())  #位置12字节，一个汉字两个字节
f.close()
f = open('file','r')
f.seek(6)      #6个字节
print(f.tell())
f.close()
f = open('file','a')
print(f.tell())  #光标默认在最后位置
f.write('你好 世界')
print(f.tell())  #光标向后9个字节，一个汉字两个字节，
f.close()
f = open('file','a',encoding='utf-8')
print(f.truncate(6)) #由于需要光标定位位置，所以也是字节。只显示6个字节的内容（6个英文字母或三个汉字,一个汉字两个字节），后面的内容被清空。
f.close()

flush 同步将数据从缓存转移到磁盘

实现进度条功能：

import sys,time  #导入sys和time模块
for i in range(40):
  sys.stdout.write('*')
  sys.stdout.flush()  #flush的作用相当于照相，拍一张冲洗一张
  time.sleep(0.2)
下面代码也能够实现相同的功能
import time 
for i in range(40):
  print('*',end='',flush=True) #print中的flush参数
  time.sleep(0.2)

truncate截断

f = open('file','a')
f.truncate(6) #只显示6个字节的内容（6个英文字符或三个汉字），后面的内容被清空。
"""--------------"""
ff = open('7_17_test.py', 'r+')
ff.truncate(10)  # 截断指定字符
print(ff.read())
# # -*- codi

with打开多个文件

with open('test.txt', 'r', encoding='utf-8') as f1, open('test2.txt', 'w', encoding='utf-8') as f2:
    for line in f1:
        line = ''.join([line.strip(), 'yang'])
        print(line)
    f2.write(line)

StringIO

读写文件，一般在磁盘上读写文件的功能都是由操作系统提供的，读写文件是请求操作系统打开一个文件对象（文件描述符），然后通过操作系统提供的接口从文件对象中读取数据(读文件)，或者把数据写入这个文件对象（写文件）；
当然读写文件也可以在内存中读写，StringIO就是在内存中读写str。

getvaule()：获得写入后的str；

from io import StringIO
f = StringIO()
f.write("hello")
print(f.getvalue())    # hello
while True:
    s = f.readline()
    if s == '':
        break
    print(s.strip())
f.write("中文")
print(f.getvalue())    # hello中文

文件常用读写总结

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
# @File    :   file_test.py
# @Time    :   2022/01/12 23:43:54
# @Author  :   wangshunzhe
读写文件处理总结
"""
from logzero import logger
"""
read():读取文件全部内容，加上参数可指定读取字符
readline():读取文件的一行
readlines()：读取文件所有行到内存中
"""
"""read一次性全文本读取文件"""
with open("wanshunzhe_script/tests/file/test.txt", "r", encoding="utf-8") as f:
    results = f.read()
    logger.info(results)
"""read按字节读取文件:
适合于分批处理文本信息，每次批量读入，批量处理，不会对内存造成较大的压力"""
with open("wanshunzhe_script/tests/file/test.txt", "r", encoding="utf-8") as f:
    index = 0
    results = f.read(100)
    logger.info(results)
    while results != "":
        index += 1
        results = f.read(100)
        logger.info(results)
        logger.info(f"index = {index}")
"""readliines:
适合处理以行为分割特点的文本，这种处理方式需要一次性把文件所有内容读取到内存中,
会逐行处理，对一些大文件的处理是很有效的"""
with open("wanshunzhe_script/tests/file/test.txt", "r", encoding="utf-8") as f:
    res = f.readlines()
    logger.info(res)
"""readline:每次只读取一行"""
with open("wanshunzhe_script/tests/file/test.txt", "r", encoding="utf-8") as f:
    res = f.readline()
    logger.info(res)
    while res != "":
        res = f.readline()
        logger.info(res)
"""
write:向文件中写入一个字符或者字节流 
writelines:将字符串或字符串序列写入文件，无返回
w+:打开一文件用于读写
"""
list = ["中午","早上","晚上"]
with open("wanshunzhe_script/tests/file/output.txt", "w+") as file_name:
    file_name.writelines(list)
    file_name.seek(0) # 调整写的指针到文件的开始位置
    for line in file_name:
    # 读取写入的数据，这时候发现是没有任何内容的
        logger.info(line)

内置模块fileinput读取文件

最适合用来读取文件！！ 重点介绍的方法： fileinput.filename(): 返回当前读取的文件名 fileinput.fileno(): 返回已被读取的行数，第一行被读取之前返回为0 fileinput.filelineno(): 批量打开多个文件时使用真实行号 fileinput.isfirstline(): 判断读取的是否是文件的第一行 fileinput.close(): 关闭打开文件

import fileinput
# 单独打开一个文件
with fileinput.input(files=('./wanshunzhe_script/tests/base/magic_module.py', )) as file:
    for line in file:
        print(f'{fileinput.filename()} 第 {fileinput.lineno()} 行: {line}')
# 批量打开多个文件
# fileinput.lineno()：批量打开多个文件时不使用真实行号
# fileinput.filelineno(): 批量打开多个文件时使用真实行号
with fileinput.input(
    files=(
        './wanshunzhe_script/tests/base/magic_module.py',
        'wanshunzhe_script/tests/base/list_test.py'
         )
        ) as file:
    for line in file:
        print(f'{fileinput.filename()} 第 {fileinput.filelineno()} 行: {line}')
# 配合glob 批量打开多个文件
import glob 
for line in fileinput.input(glob.glob("./wanshunzhe_script/tests/base/*.log")):
    if fileinput.isfirstline():
        print(f'{"#" * 20} Reading {fileinput.filename()} {"#" * 20}')
    print(str(fileinput.lineno()) + ': ' + line.upper(), end='')
# 标准输出重定向替换: inplace=True
import glob 
print(f"{'*' * 20} Start Task {'*' * 20}")
for line in fileinput.input(glob.glob("./wanshunzhe_script/tests/base/*.log"), inplace=True):
    if fileinput.isfirstline():
        print(f'{"#" * 20} Reading {fileinput.filename()} {"#" * 20}')
    print(str(fileinput.lineno()) + ': ' + line.upper(), end='')
print(f"{'*' * 20} End Task {'*' * 20}")
"""
自定义对象读取方法
    使用fileinput.input() 中的openhook参数
"""
def online_open(url, mode):
    """
    自定义读取方法
    """
    import requests
    r = requests.get(url)
    filename = url.split("/")[-1]
    print(f'filename: {filename}')
    with open(filename, 'w') as f1:
        f1.write(r.content.decode('utf-8'))
    f2 = open(filename, "r")
    return f2
file_url = "https://www.csdn.net/robots.txt"
with fileinput.input(files=(file_url, ), openhook=online_open) as file:
    for line in file:
        print(line, end="")
# User-agent: * 
# Disallow: /scripts 
# Disallow: /public 
# Disallow: /css/ 
# Disallow: /images/ 
# Disallow: /content/ 
# Disallow: /ui/ 
# Disallow: /js/ 
# Disallow: /scripts/ 
# Disallow: /article_preview.html* 
# Disallow: /tag/
# Disallow: /*?*
# Disallow: /link/
# Sitemap: https://www.csdn.net/sitemap-aggpage-index.xml
# Sitemap: https://www.csdn.net/article/sitemap.txt