1. if __name__ == '__main__':
  2. path = r'D:\\Rexel数据\\HTML\\' # 文件夹目录
  3. files_names = os.listdir(path) # 得到文件夹下所有的文件名
  4. for file_name in files_names: # 遍历得到的文件名
  5. # 拼接文件路径,得到每一个文件的具体路径:D:\\Rexel数据\\HTML\\AAA196.html
  6. everyFile = os.path.join(path, file_name)
  7. with open(everyFile,'rb') as f:
  8. # 操作

一 获取多层级目录下的文件

需求:获取多层级目录下的文件名,用字典方式保存一级目录名、二级目录名、三级文件名

一级目录(文件夹):
image.png
二级目录(文件夹):
image.png
三极目录(里面是文件):
image.png

  1. 目录层次:
  2. -AAA046
  3. --left_img
  4. ---20201013141526260_S.jpg
  5. 即:AAA046\left_img\20201013141526260_S.jpg

代码:

  1. #!usr/bin/env python
  2. # -*- coding:utf-8 -*-
  3. import os
  4. if __name__ == "__main__":
  5. path = r"D:\Rexel数据\TEST"
  6. # path 表示当前正在访问的文件夹路径
  7. # dir_list 表示该文件夹下的子目录名list
  8. # file_list 表示该文件夹下的文件list
  9. # 打开文件:一级目录 AAA046
  10. dirs_1 = os.listdir(path)
  11. # dir_1 AAA046
  12. for dir_1 in dirs_1:
  13. # children_path:一级目录路径 D:\Rexel数据\TEST\AAA046
  14. children_path = path + '\\' + dir_1
  15. # dirs_2 ['brand_authorization_img', 'left_img', 'notices_img', 'overview_img', 'pro_applications_img', 'pro_description_img', 'pro_detail_img', 'pro_selection_data_img', 'pro_specifications_img']
  16. dirs_2 = os.listdir(children_path)
  17. img_dic = {}
  18. img_dic['order_num'] = dir_1
  19. for dir_2 in dirs_2:
  20. # children_1_path:二级目录路径 D:\Rexel数据\TEST\AAA046\brand_authorization_img
  21. children_1_path = children_path + '\\' + dir_2
  22. # dirs_3 是children_1_path下面所有文件名list
  23. dirs_3 = os.listdir(children_1_path)
  24. img_dic[dir_2] =dirs_3
  25. with open(r'D:\Rexel数据\img_relation.csv','a+',encoding='utf-8') as f:
  26. f.write(str(img_dic) + '\n')
  • 每访问一级目录就创建一个img_dic字典,每访问一次二级目录就将二级目录名以及二级目录下面的文件名list加入字典中,这样每个订货编码就有一对应的图片对应信息

最终字典格式:

  1. # 最终字典格式
  2. {
  3. 'order_num': 'AAA198',
  4. 'brand_authorization_img': ['MRa%2BzdNFExKiYCWTTcdTwtkz5M35izotiu0aDZXJu6g%3D%3AaWg0OHRzZGxnUFBZU2FBUg%3D%3D.jpg'],
  5. 'left_img': ['20180921104125341_S.jpg'],
  6. 'notices_img': ['rGA2%2FPrSYfdSS%2FwYqakgWkUw5d4I6FmX25DFCzFskVs%3D%3AaWg0OHRzZGxnUFBZU2FBUg%3D%3D.jpg'],
  7. 'overview_img': ['PlVPoiyDFWqlhqSoWUiGmKPjtuEEKtdZ4IgXu65jpnc%3D%3AaWg0OHRzZGxnUFBZU2FBUg%3D%3D.jpg'],
  8. 'pro_applications_img': ['PlVPoiyDFWqlhqSoWUiGmDW8WXdhslGnXr06XYUzPVg%3D%3AaWg0OHRzZGxnUFBZU2FBUg%3D%3D.jpg'],
  9. 'pro_description_img': ['PlVPoiyDFWqlhqSoWUiGmBqgkw0kq46DNgAE81wY%2F4c%3D%3AaWg0OHRzZGxnUFBZU2FBUg%3D%3D.jpg'],
  10. 'pro_detail_img': ['PlVPoiyDFWqlhqSoWUiGmNzmn%2F3h4YeMJMMWF8dcWe0%3D%3AaWg0OHRzZGxnUFBZU2FBUg%3D%3D.jpg'],
  11. 'pro_selection_data_img': ['rGA2%2FPrSYfdSS%2FwYqakgWilBsJbWjzANWbCirUPDc5c%3D%3AaWg0OHRzZGxnUFBZU2FBUg%3D%3D.jpg', 'rGA2%2FPrSYfdSS%2FwYqakgWu225FqvYIYphSdBBuQJyOY%3D%3AaWg0OHRzZGxnUFBZU2FBUg%3D%3D.jpg'],
  12. 'pro_specifications_img': ['PlVPoiyDFWqlhqSoWUiGmItUOSmbhtHKgB75%2BLRBWJQ%3D%3AaWg0OHRzZGxnUFBZU2FBUg%3D%3D.jpg']
  13. }