监控github项目最新releases

闲着无聊，自己又在自学习python。之前分享过搭建的hugo的过程，因为国内连接github速度有点慢，所以当时就把最新的releases下载了，并上传到coding当作备份，当时写的是每周去下载一次，但是自己太懒不想动，于是。。。更新了好几个版本自己也没动，于是就想着用python帮忙了。

功能介绍

1.通过github的api进行请求，获取最新的内容，并于本地进行对比。
2.多线程下载同一个文件，提高速度,最高5线程下载，并自动重连。
3.文件下载完后，启动另一个线程去git上传，提高时间利用率。

迷惑行为

1.在本地的windows10上coding不初始化也可以正常使用，但是服务器上不行。而且也不会进行任何报错。所以使用的时候必须先进行初始化远程仓库。

2.也需要执行下面内容，原因上述相同。（xxxx替换为自己的名称和邮箱）


git config --global user.name  "xxxx"
git config --global user.email  "xxxx"

3.自己的python才开始学，对于一些进程的守护没办法，只能一直用循环和全局变量来判断进程是否结束，仍需要改进。

代码


#coding=utf-8
import threading
import time
import os
import requests
from sys import stdout
import sys
import git
from apscheduler.schedulers.blocking import BlockingScheduler 
gitok=[]
class copy: 
    def __init__(self,url,file_name,file_size,to_path,file_path,lock,ii):
        self.url=url
        self.file_paths=to_path+'/'+file_path
        self.file_path=file_path
        self.file_name=file_name
        self.jincheng=[[],[]]
        self.file_size=file_size
        self.sss=[]
        self.thread_num=5
        self.to_path=to_path
        self.lock=lock
        self.i=ii

    def Handler(self,start, end, url, filename,file_size,id):
        self.jincheng[0].append(threading.current_thread())
        self.jincheng[1].append(1)
        self.sss.append(0)
        while True:
            try:
                headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36 Edg/80.0.361.111",'Range': 'bytes=%d-%d' % (start, end)}
                r = requests.get(url, headers=headers, stream=True,timeout=5)
                with open(filename, "r+b") as fp:
                    fp.seek(start)
                    chunk_size = 128
                    for chunk in r.iter_content(chunk_size):
                        fp.write(chunk)
                        start+=chunk_size
                        self.sss[id]+=chunk_size
                        #print(self.sss[id])
                self.jincheng[1][self.jincheng[0].index(threading.current_thread())]=0
                break
            except:
                #print(id,"下载失败")
                pass
                
        
        
        
    def main(self):
        global gitok
        out='out/'
        if not os.path.exists(out):
            os.mkdir(out)
        with open(out+"未完成"+self.file_name, "wb") as fp:
            fp.truncate(self.file_size)
        # 启动多线程写文件
        self.thread_num=(self.file_size//102400)+1
        if self.thread_num>5:
            self.thread_num=5
        
        part=self.file_size//self.thread_num
        for i in range(self.thread_num):
            start = part * i
            if i == self.thread_num - 1:   # 最后一块
                end = self.file_size
            else:
                end = start + part
            t = threading.Thread(target=self.Handler, kwargs={'start': start, 'end': end, 'url': self.url, 'filename': out+"未完成"+self.file_name,'file_size':self.file_size,'id':
            i})
            t.setDaemon(True)
            t.start()

        while True:
            stdout.write("下载进度%.2f %d\n"%((sum(self.sss)/self.file_size*100),sum(self.jincheng[1])))
            if not 1 in self.jincheng[1]:
                stdout.write("下载进度%.2f %d\n"%((sum(self.sss)/self.file_size*100),sum(self.jincheng[1])))
                if os.path.exists(self.file_paths+self.file_name):
                    os.remove(self.file_paths+self.file_name)
                os.rename(out+"未完成"+self.file_name,self.file_paths+self.file_name)
                x = threading.Thread(target=push,kwargs={'to_path':self.to_path,'file_path':self.file_path+self.file_name,'id':self.i,'lock':self.lock})
                self.i+=1
                x.setDaemon(True)
                x.start()
                gitok.append(1)
                break 
            time.sleep(5)

        
def push(to_path,file_path,id,lock):
    global gitok
    lock.acquire()
    print("%s锁定上传中"%file_path)
    repo=git.Repo(to_path)
    index = repo.index
    if id is 0:
        repo.git.add('-A')
    else:
        try:
            repo.git.add(file_path)
        except:
            repo.git.add('-A')
    index.commit(file_path)
    remote = repo.remote()
    remote.push()
    gitok[id]=0
    print("%s解锁上传完毕"%file_path)
    lock.release()
    
def main():
    global gitok
    i=0
    lock = threading.Lock()
    #自己采用的令牌和令牌密钥形式连接的git,可自行更换为其他形式。
    key=r'xxxx:xxxxxx'
    repo_url=('https://%s@e.coding.net/halfye/hugo.git'%key)
    to_path='./hugo'
    if not os.path.exists(to_path):
        git.Repo.clone_from(repo_url, to_path)
    else:
        try:
            repo=git.Repo(to_path)
            remote = repo.remote()
            remote.pull()
        except:
            pass
    url=r'https://api.github.com/repos/gohugoio/hugo/releases/latest'
    
    while True:
        try:
            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36 Edg/80.0.361.111"}
            strhtml=requests.get(url,headers=headers, stream=True,timeout=5)
            dic=strhtml.json()    
            break
            #with open('1.txt', "r") as fp:
            #strhtml=fp.read()
            #dic=json.loads(strhtml)
        except:
            print("失败一次")
            time.sleep(20)
            

    dirs=to_path+'/'+dic['tag_name']+'/'
    if not os.path.exists(dirs):
        os.makedirs(dirs) 
    
    for it in dic["assets"]:
        file_size=it["size"]
        url1=it["browser_download_url"]
        file_name = url1.split('/')[-1]
        if not os.path.exists(dirs+file_name):
            #可以文本替换为自己的反向代理网站，提高速度
            #url1=url1.replace("https://github.com/", "https://xxx.xxxx.com/", 1)
            print (url1,"将要下载")
            cc=copy(url1,file_name,file_size,to_path,dic['tag_name']+'/',lock,i)
            cc.main()
            i+=1
        else:
            print(file_name,'已经存在')
        if it==dic["assets"][-1]:#守护git上传
            while True:
                if sum(gitok) is 0:
                    print('守护结束')
                    break
                time.sleep(5)
                print("正在守护")
  
main()
if __name__ == '__main__':
    scheduler = BlockingScheduler()
    scheduler.add_job(main,'cron',day_of_week ='0-6',hour = 12,minute = 00,second = 00)
    try:
        scheduler.start()
    except (KeyboardInterrupt, SystemExit):
        pass