1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
| #!/usr/bin/env python
# -*- coding: utf-8 -*-
import concurrent.futures
import os
from urllib.parse import urljoin
import requests
from lxml import etree
class Wallpaper:
def __init__(self, address):
self.base_url = f"https://www.youwu.cc/{address}/"
self.headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36"
}
self.session = requests.Session()
os.makedirs("images", exist_ok=True)
def fetch(self):
base_page = self.session.get(url=self.base_url, headers=self.headers)
base_tree = etree.HTML(base_page.content)
max_page = int(base_tree.xpath("//div[@class='wrap page clearfix']/a[1]/text()")[0][2:])
for page in range(1, max_page + 1):
page_url = self.base_url if page == 1 else f"{self.base_url}index_{page}.html"
main_page = self.session.get(url=page_url, headers=self.headers)
main_tree = etree.HTML(main_page.content)
for sub_url in main_tree.xpath("//div[@class='photo']//a/@href"):
album_url = urljoin(self.base_url, sub_url)
album_page = self.session.get(url=album_url, headers=self.headers)
album_tree = etree.HTML(album_page.content)
max_image = int(album_tree.xpath("//div[@class='page']//a[1]/text()")[0][2:])
for image in range(1, max_image + 1):
image_url = album_url if image == 1 else f"{album_url[:-5]}_{image}.html"
image_page = self.session.get(url=image_url, headers=self.headers)
image_tree = etree.HTML(image_page.content)
for picture_url in image_tree.xpath("//div[@class='photo']/a/img/@src"):
filename = os.path.basename(picture_url)
filepath = os.path.join("images", filename)
with open(filepath, "wb") as f:
response = self.session.get(picture_url, headers=self.headers)
response.raise_for_status()
f.write(response.content)
print(f"{filename} ... OK!")
if __name__ == "__main__":
names = ["mygirl", "xiuren", "xiaoyu", "imiss"]
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [executor.submit(Wallpaper(name).fetch) for name in names]
for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as e:
print(f"Exception occurred: {e}")
|