博客地址https://jameshoi.github.io/

要跟语雀说再见了,最爱这个能每篇文章显示封面的功能,但是在语雀写文章再转pdf格式实在是太烂了,导出成markdown图片还保留cdn的图片格式,而且还不能批量导出文章。
image.png

最后我自己写了个脚本解决这个问题,贴到这里给后人作为参考吧

  1. import requests
  2. import json
  3. import os
  4. import time
  5. from urllib.parse import urlparse,urlsplit,parse_qs,parse_qsl
  6. TOKEN = "" #这里填你的token
  7. NAMESPACE = "jameshoi"
  8. SAVE_PATH = "./source/"
  9. DOWNLOAD_FILE_PATH = "https://jameshoi.github.io/files/"
  10. TIMEOUT = 5
  11. def download_file(url,filename):
  12. while True:
  13. try:
  14. response=requests.get(url,timeout=TIMEOUT)
  15. break
  16. except Exception as e:
  17. time.sleep(2)
  18. with open(filename,'wb') as f: f.write(response.content)
  19. def get_all_md_info(doc_path):
  20. url = "https://www.yuque.com/api/v2/repos/jameshoi/{}/docs".format(doc_path)
  21. headers = {
  22. 'Content-Type': 'application/json',
  23. 'X-Auth-Token': TOKEN
  24. }
  25. response = requests.get(url, headers=headers,timeout=TIMEOUT)
  26. return json.loads(response.content)
  27. def download_md(doc_path,doc_name):
  28. optional = "attachment=true&latexcode=false&anchor=false&linebreak=false"
  29. url = "https://www.yuque.com/jameshoi/{}/{}/markdown?{}".format(doc_path,doc_name,optional)
  30. download_file(url,SAVE_PATH+doc_name+".md")
  31. def save_img(doc_name,url):
  32. filename = urlparse(url).path.split("/")[-1]
  33. if not os.path.exists(SAVE_PATH+doc_name): os.mkdir(SAVE_PATH+doc_name)
  34. download_file(url, SAVE_PATH+doc_name+"/"+filename)
  35. return filename
  36. def replace_img_url(doc_name):
  37. with open(SAVE_PATH+doc_name+".md","rb+") as f:
  38. content = f.read().decode()
  39. start = 0; url_list = []
  40. while True:
  41. head = content.find("![",start)
  42. mid = content.find("(",head)+1
  43. end = content.find(")",mid)
  44. if head == -1 or mid == 0 or end == -1: break
  45. url = content[mid:end]; start = end
  46. filename = save_img(doc_name, url)
  47. url_list.append([url, doc_name+"/"+filename])
  48. for [url,path] in url_list: content = content.replace(url,path)
  49. with open(SAVE_PATH+doc_name+".md","wb+") as f: f.write(content.encode())
  50. def add_md_info(doc_name,title,date):
  51. data = "---\ntitle: {}\ndate: {}\n---\n".format(title,date)
  52. with open(SAVE_PATH+doc_name+".md","rb+") as f:
  53. old = f.read(); f.seek(0)
  54. f.write(data.encode()); f.write(old)
  55. def replace_file_url(doc_name):
  56. with open(SAVE_PATH+doc_name+".md","rb+") as f:
  57. content = f.read().decode()
  58. start = 0; url_list = []
  59. while True:
  60. head = content.find("[",start)
  61. mid = content.find("(",head)+1
  62. end = content.find(")",mid)
  63. if head == -1 or mid == 0 or end == -1: break
  64. url = content[mid:end]; start = end
  65. if content[mid:mid+33] != "https://www.yuque.com/attachments": continue
  66. filename = content[head+1:mid-2]
  67. url_list.append([url, DOWNLOAD_FILE_PATH+doc_name+"/"+filename])
  68. for [url,path] in url_list: content = content.replace(url,path)
  69. with open(SAVE_PATH+doc_name+".md","wb+") as f: f.write(content.encode())
  70. def yuque_main():
  71. DOC_PATH = ["writeup","default"]
  72. for doc_path in DOC_PATH:
  73. all_info = get_all_md_info(doc_path)
  74. total = len(all_info["data"]); i = 0
  75. for info in all_info["data"]:
  76. doc_name = info["slug"]; i += 1
  77. date = info["created_at"]
  78. title = info["title"]
  79. print("[{}][{}/{}]正在下载 {}.md".format(doc_path,i,total,doc_name))
  80. download_md(doc_path, doc_name)
  81. replace_img_url(doc_name)
  82. add_md_info(doc_name, title, date)
  83. replace_file_url(doc_name)
  84. if __name__ == '__main__':
  85. yuque_main()