开发环境:
- Python2.7 + win10

文章插图
实现代码:from bs4 import BeautifulSoupimport lxmlimport Queueimport requestsimport re,os,sys,randomimport threadingimport loggingimport json,hashlib,urllibfrom requests.exceptions import ConnectTimeout,ConnectionError,ReadTimeout,SSLError,MissingSchema,ChunkedEncodingErrorimport random'''遇到不懂的问题?Python学习交流群:821460695满足你的需求,资料都已经上传群文件,可以自行下载!'''reload(sys)sys.setdefaultencoding('gbk')# 日志模块logger = logging.getLogger("AppName")formatter = logging.Formatter('%(asctime)s %(levelname)-5s: %(message)s')console_handler = logging.StreamHandler(sys.stdout)console_handler.formatter = formatterlogger.addHandler(console_handler)logger.setLevel(logging.INFO)q = Queue.Queue() # url队列page_q = Queue.Queue() # 页面def downlaod(q,x,path): urlhash = "https://weibomiaopai.com/" try: html = requests.get(urlhash).text except SSLError: logger.info(u"网络不稳定 正在重试") html = requests.get(urlhash).text reg = re.compile(r'var hash="(.*?)"', re.S) result = reg.findall(html) hash_v = result[0] while True: data = https://www.isolves.com/it/cxkf/yy/Python/2019-08-21/q.get() url, name = data[0], data[1].strip().replace("|", "") file = os.path.join(path, '%s' + ".mp4") % name api = "https://steakovercooked.com/api/video/?cached&hash=" + hash_v + "&video=" + url api2 = "https://helloacm.com/api/video/?cached&hash=" + hash_v + "&video=" + url try: res = requests.get(api) result = json.loads(res.text) except (ValueError,SSLError): try: res = requests.get(api2) result = json.loads(res.text) except (ValueError,SSLError): q.task_done() return False vurl = result['url'] logger.info(u"正在下载:%s" %name) try: r = requests.get(vurl) except SSLError: r = requests.get(vurl) except MissingSchema: q.task_done() continue try: with open(file,'wb') as f: f.write(r.content) except IOError: name = u'好开心么么哒 %s' % random.randint(1,9999) file = os.path.join(path, '%s' + ".mp4") % name with open(file,'wb') as f: f.write(r.content) logger.info(u"下载完成:%s" %name) q.task_done()def get_page(keyword,page_q): while True: headers = { 'user-agent': 'Mozilla/5.0 (windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0' } page = page_q.get() url = "https://www.youtube.com/results?sp=EgIIAg%253D%253D&search_query=" + keyword + "&page=" + str(page) try: html = requests.get(url, headers=headers).text except (ConnectTimeout,ConnectionError): print u"不能访问youtube 检查是否已FQ" os._exit(0) reg = re.compile(r'"url":"/watch?v=(.*?)","webPageType"', re.S) result = reg.findall(html) logger.info(u"第 %s 页" % page) for x in result: vurl = "https://www.youtube.com/watch?v=" + x try: res = requests.get(vurl).text except (ConnectionError,ChunkedEncodingError): logger.info(u"网络不稳定 正在重试") try: res = requests.get(vurl).text except SSLError: continue reg2 = re.compile(r"
【python多线程爬取youtube视频,外面的世界很精彩】
推荐阅读
- JavaScript命名空间常用方法
- python爬取拉勾网数据并进行数据可视化
- 购买哪种绿茶比较好
- Python匿名函数的介绍及用途
- Python教程:使用Turtles画出带有花瓣的花
- 世界上最贵的酒店一晚多少钱 世界上最贵的酒店住一晚上40万元
- 买壶时 你是不是人傻钱多的其中之
- 多久去一次角质最好?去角质的最佳时间
- 开眼角多久恢复,注意养护
- 美瞳线一般多久掉痂恢复 美瞳线多久掉痂完毕
