"""爬取4399小游戏的游戏链接"""import requestsfrom lxml import etreeurl = "http://www.4399.com/"#4399小游戏网址headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)...
"""
爬取4399小游戏的游戏链接
"""
import requests
from lxml import etree
url = "http://www.4399.com/"
#4399小游戏网址
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.62"
}
#请求头
reps = requests.get(url=url,headers=headers)
#发起请求
reps.encoding = "gbk"
#转换编码格式
etr = etree.HTML(reps.text)
he = etr.xpath('//*[@id="skinbody"]/div[10]/div[1]/div[1]/ul')
#提取整个
for i in he:
mm = i.xpath('.//li/a/@href')
#提取部分
for i in mm:
print(url+i)
#文本拼接
新手代码不好的地方请帮忙改正
谢谢各位大佬了