闲着无聊,自己又在自学习python。之前分享过搭建的hugo的过程,因为国内连接github速度有点慢,所以当时就把最新的releases下载了,并上传到coding当作备份,当时写的是每周去下载一次,但是自己太懒不想动,于是。。。更新了好几个版本自己也没动,于是就想着用python帮忙了。
功能介绍
1.通过github的api进行请求,获取最新的内容,并于本地进行对比。
2.多线程下载同一个文件,提高速度,最高5线程下载,并自动重连。
3.文件下载完后,启动另一个线程去git上传,提高时间利用率。
迷惑行为
1.在本地的windows10上coding不初始化也可以正常使用,但是服务器上不行。而且也不会进行任何报错。所以使用的时候必须先进行初始化远程仓库。
2.也需要执行下面内容,原因上述相同。(xxxx替换为自己的名称和邮箱)
git config --global user.name "xxxx"
git config --global user.email "xxxx"
3.自己的python才开始学,对于一些进程的守护没办法,只能一直用循环和全局变量来判断进程是否结束,仍需要改进。
代码
#coding=utf-8
import threading
import time
import os
import requests
from sys import stdout
import sys
import git
from apscheduler.schedulers.blocking import BlockingScheduler
gitok=[]
class copy:
def __init__(self,url,file_name,file_size,to_path,file_path,lock,ii):
self.url=url
self.file_paths=to_path+'/'+file_path
self.file_path=file_path
self.file_name=file_name
self.jincheng=[[],[]]
self.file_size=file_size
self.sss=[]
self.thread_num=5
self.to_path=to_path
self.lock=lock
self.i=ii
def Handler(self,start, end, url, filename,file_size,id):
self.jincheng[0].append(threading.current_thread())
self.jincheng[1].append(1)
self.sss.append(0)
while True:
try:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36 Edg/80.0.361.111",'Range': 'bytes=%d-%d' % (start, end)}
r = requests.get(url, headers=headers, stream=True,timeout=5)
with open(filename, "r+b") as fp:
fp.seek(start)
chunk_size = 128
for chunk in r.iter_content(chunk_size):
fp.write(chunk)
start+=chunk_size
self.sss[id]+=chunk_size
#print(self.sss[id])
self.jincheng[1][self.jincheng[0].index(threading.current_thread())]=0
break
except:
#print(id,"下载失败")
pass
def main(self):
global gitok
out='out/'
if not os.path.exists(out):
os.mkdir(out)
with open(out+"未完成"+self.file_name, "wb") as fp:
fp.truncate(self.file_size)
# 启动多线程写文件
self.thread_num=(self.file_size//102400)+1
if self.thread_num>5:
self.thread_num=5
part=self.file_size//self.thread_num
for i in range(self.thread_num):
start = part * i
if i == self.thread_num - 1: # 最后一块
end = self.file_size
else:
end = start + part
t = threading.Thread(target=self.Handler, kwargs={'start': start, 'end': end, 'url': self.url, 'filename': out+"未完成"+self.file_name,'file_size':self.file_size,'id':
i})
t.setDaemon(True)
t.start()
while True:
stdout.write("下载进度%.2f %d\n"%((sum(self.sss)/self.file_size*100),sum(self.jincheng[1])))
if not 1 in self.jincheng[1]:
stdout.write("下载进度%.2f %d\n"%((sum(self.sss)/self.file_size*100),sum(self.jincheng[1])))
if os.path.exists(self.file_paths+self.file_name):
os.remove(self.file_paths+self.file_name)
os.rename(out+"未完成"+self.file_name,self.file_paths+self.file_name)
x = threading.Thread(target=push,kwargs={'to_path':self.to_path,'file_path':self.file_path+self.file_name,'id':self.i,'lock':self.lock})
self.i+=1
x.setDaemon(True)
x.start()
gitok.append(1)
break
time.sleep(5)
def push(to_path,file_path,id,lock):
global gitok
lock.acquire()
print("%s锁定上传中"%file_path)
repo=git.Repo(to_path)
index = repo.index
if id is 0:
repo.git.add('-A')
else:
try:
repo.git.add(file_path)
except:
repo.git.add('-A')
index.commit(file_path)
remote = repo.remote()
remote.push()
gitok[id]=0
print("%s解锁上传完毕"%file_path)
lock.release()
def main():
global gitok
i=0
lock = threading.Lock()
#自己采用的令牌和令牌密钥形式连接的git,可自行更换为其他形式。
key=r'xxxx:xxxxxx'
repo_url=('https://%s@e.coding.net/halfye/hugo.git'%key)
to_path='./hugo'
if not os.path.exists(to_path):
git.Repo.clone_from(repo_url, to_path)
else:
try:
repo=git.Repo(to_path)
remote = repo.remote()
remote.pull()
except:
pass
url=r'https://api.github.com/repos/gohugoio/hugo/releases/latest'
while True:
try:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36 Edg/80.0.361.111"}
strhtml=requests.get(url,headers=headers, stream=True,timeout=5)
dic=strhtml.json()
break
#with open('1.txt', "r") as fp:
#strhtml=fp.read()
#dic=json.loads(strhtml)
except:
print("失败一次")
time.sleep(20)
dirs=to_path+'/'+dic['tag_name']+'/'
if not os.path.exists(dirs):
os.makedirs(dirs)
for it in dic["assets"]:
file_size=it["size"]
url1=it["browser_download_url"]
file_name = url1.split('/')[-1]
if not os.path.exists(dirs+file_name):
#可以文本替换为自己的反向代理网站,提高速度
#url1=url1.replace("https://github.com/", "https://xxx.xxxx.com/", 1)
print (url1,"将要下载")
cc=copy(url1,file_name,file_size,to_path,dic['tag_name']+'/',lock,i)
cc.main()
i+=1
else:
print(file_name,'已经存在')
if it==dic["assets"][-1]:#守护git上传
while True:
if sum(gitok) is 0:
print('守护结束')
break
time.sleep(5)
print("正在守护")
main()
if __name__ == '__main__':
scheduler = BlockingScheduler()
scheduler.add_job(main,'cron',day_of_week ='0-6',hour = 12,minute = 00,second = 00)
try:
scheduler.start()
except (KeyboardInterrupt, SystemExit):
pass
挺巧的,看不懂。。。。
有太多bug了 懒得修复