1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
| q = ''' 前言 :输入数字选对应网址
1 : 最新的(https://wallhaven.cc/latest)
2 : 最火的(https://wallhaven.cc/hot)
3 : 排行榜(https://wallhaven.cc/toplist)
4 : 随机的(https://wallhaven.cc/random)
5 : 收藏总数榜(https://wallhaven.cc/search?categories=000&purity=000&topRange=3d&sorting=favorites&order=desc%3D&)
提示:有默认选项的可直接回车
''' live = ''' *************************************************** く__,.ヘヽ. / ,ー、 〉 \ ', !-─‐-i / /´ /`ー' L//`ヽ、 / /, /| , , ', イ / /-‐/ i L_ ハ ヽ! i レ ヘ 7イ`ト レ'ァ-ト、!ハ| | !,/7 '0' ´0iソ| | |.从" _ ,,,, / |./ | レ'| i>.、,,__ _,.イ / .i | レ'| | / k_7_/レ'ヽ, ハ. | | |/i 〈|/ i ,.ヘ | i | .|/ / i: ヘ! \ | kヽ>、ハ _,.ヘ、 /、! !'〈//`T´', \ `'7'ーr' レ'ヽL__|___i,___,ンレ|ノ ト-,/ |___./ 'ー' !_,.: ********** 资料收集完毕,程序开始执行 ************* ''' done = ''' **************************** * 程序执行完毕! * * * * Created by Jason * **************************** ''' '''内置网址供选择,也可自定义输入站内网址''' import os,requests,bs4,time def choose(j): global m if j == '1': m = 'https://wallhaven.cc/latest' elif j == '2': m = 'https://wallhaven.cc/hot' elif j == '3': m = 'https://wallhaven.cc/toplist' elif j == '4': m = 'https://wallhaven.cc/random' elif j == '5': m = 'https://wallhaven.cc/search?categories=000&purity=000&topRange=3d&sorting=favorites&order=desc&' j = input(q + '请按指示输入数字:') if j: choose(j) else: print('\n请输入正确的数字\n') exit(0) k = input('\n[$][选填]你希望从第几页开始爬取(默认为第一页):') l = input('\n[$]你希望爬到第几页结束:') i = input('\n[$]请输入要保存的绝对路径(默认为桌面):') '''操作目录''' try: os.chdir(i) download_path = os.path.join(os.getcwd(),'wallpaper\\') print(download_path) except: os.chdir('C:\\Users\\{}\\desktop'.format(os.getlogin())) download_path = os.path.join(os.getcwd(),'wallpaper\\') if not os.path.exists(download_path): os.mkdir(download_path) print('\n[+]当前下载目录为{}'.format(download_path)) print(live) '''定义下载函数''' def download(newUrl): os.chdir(download_path) headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'} r = requests.get(newUrl,headers=headers,stream=True) soup = bs4.BeautifulSoup(r.text,'html.parser') src = soup.findAll('a',attrs={'class':'preview'}) for line in src: u = line.get('href') r = requests.get(u,stream=True,headers=headers) soup = bs4.BeautifulSoup(r.text,'html.parser') link = soup.findAll('img',attrs={"id":"wallpaper"}) for j in link: p = j.get('src') print('\r[*]已找到图片链接{}'.format(p)) if p[-3] == 'p': q = requests.get(p,stream=True,headers=headers) filename = p.split('/')[-1] try: if not os.path.isfile(filename): with open(download_path+filename,'wb') as g: for k in q.iter_content(10000): g.write(k) print("\r[*]正在爬取图片{}".format(filename),end="") print('\r[+]爬取成功,图片名:{}\n\n'.format(filename),end="") else: print('\r[-]图片已存在,跳过\n') except: pass elif p[-3] == 'e': q = requests.get(p,stream=True,headers=headers) filename = p.split('/')[-1] try: if not os.path.isfile(filename): with open(download_path+filename,'wb') as g: for k in q.iter_content(10000): g.write(k) print("\r[*]正在爬取图片{}".format(filename),end="") print("\r[+]爬取成功,图片名:{}\n\n".format(filename),end="") else: print('\r[-]图片已存在,跳过\n') except: print('\r[-]{}爬取失败'.format(q)) else: q = requests.get(p,stream=True,headers=headers) filename = p.split('/')[-1] try: if not os.path.isfile(filename): with open(download_path+filename,'wb') as g: for k in q.iter_content(10000): g.write(k) print("\r[*]正在爬取图片{}".format(filename),end="") print("\r[+]爬取成功,图片名:{}\n\n".format(filename),end="") else: print('\r[-]图片已存在,跳过\n') except: print('\r[-]{}爬取失败'.format(q)) '''从第几页下载,要下载几页''' if k: for i in range(int(k),int(l)): newUrl = m + '?page=' + str(i) download(newUrl) if int(k) + i == int(l): print('\n'+done) else: print('\n[+]第{}页爬取完成,开始爬取第{}页\n'.format(i,i+1)) time.sleep(1) print(done) else: for i in range(1,int(l)): newUrl = m + '?page=' + str(i) download(newUrl) if i + 1 == int(l): print('\n'+done) else: print('\n[+]第{}页爬取完成,开始爬取第{}页\n'.format(i,i+1)) time.sleep(1)
|