1、最简单请求
[root@wrx python3]# vim spider.py #!/usr/local/bin/python3 import urllib.request url = "http://www.sdlj.ltd/" response = urllib.request.urlopen(url) html = response.read() print(html)
2、使用Request
#!/usr/local/bin/python3 import urllib.request url = "http://www.sdlj.ltd/" request = urllib.request.Request(url) response = urllib.request.urlopen(request) html = response.read() print(html)
3、get发送数据
#!/usr/local/bin/python3 import urllib.request import urllib.parse import re url = "http://www.sdlj.ltd/article" values = {"id":"23"} data = urllib.parse.urlencode(values) reg = "\?" if(re.search(reg,url)): url = url + data else: url = url + "?" + data request = urllib.request.Request(url) response = urllib.request.urlopen(request) html = response.read() print(html)
4、使用post发送数据
#!/usr/local/bin/python3 import urllib.request import urllib.parse import re url = "http://www.sdlj.ltd/article" values = {"id":"23"} data = urllib.parse.urlencode(values).encode(encoding="utf-8") request = urllib.request.Request(url,data) response = urllib.request.urlopen(request) html = response.read() print(html)
5、发送数据和header
#!/usr/local/bin/python3 import urllib.request import urllib.parse import re url = "http://www.sdlj.ltd/" values = { "u":"www.google.com" } headers = { "Cookie":"s=v9kao5hg5fdscvglnk6diu5ib0", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding":"gzip, deflate, sdch, br", "Accept-Language":"zh-CN,zh;q=0.8", "Connection":"keep-alive", "Host":"ty1.mm.4night.win", "Upgrade-Insecure-Requests":"1", "User-Agent":"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", } data = urllib.parse.urlencode(values).encode() request = urllib.request.Request(url,data,headers) response = urllib.request.urlopen(request) html = response.read() print(html.decode())
6、超时
#!/usr/local/bin/python3 import urllib.request import urllib.parse import socket url = "http://www.sdlj.ltd/article" values = {"id":"23"} data = urllib.parse.urlencode(values).encode(encoding="utf-8") # timeout in seconds timeout = 2 #2秒 socket.setdefaulttimeout(timeout) request = urllib.request.Request(url,data) response = urllib.request.urlopen(request) html = response.read() print(html)
7、使用代理
#!/usr/local/bin/python3 import urllib.request proxys = urllib.request.ProxyHandler({'http':"www.sdlj.ltd:8080"}) #http:代理协议类型 代理地址及端口 opener = urllib.request.build_opener(proxys) urllib.request.install_opener(opener) html = urllib.request.urlopen("http://www.sdlj.ltd").read().decode("utf8") print(html)