import
urllib
import
urllib.request
def
loadPage(url,filename):
request
=
urllib.request.Request(url)
html1
=
urllib.request.urlopen(request).read()
return
html1.decode(
'utf-8'
)
def
writePage(html,filename):
with
open
(filename,
'w'
) as f:
f.write(html)
print
(
'-'
*
30
)
def
tiebaSpider(url,beginPage,endPage):
for
page
in
range
(beginPage,endPage
+
1
):
pn
=
(page
-
1
)
*
50
fullurl
=
url
+
"&pn="
+
str
(pn)
print
(fullurl)
filename
=
'第'
+
str
(page)
+
'页.html'
html
=
loadPage(url,filename)
writePage(html,filename)
if
__name__
=
=
"__main__"
:
kw
=
input
(
'请输入你要需要爬取的贴吧名:'
)
beginPage
=
int
(
input
(
'请输入起始页'
))
endPage
=
int
(
input
(
'请输入结束页'
))
url
=
'https://tieba.baidu.com/f?'
kw1
=
{
'kw'
:kw}
key
=
urllib.parse.urlencode(kw1)
fullurl
=
url
+
key
tiebaSpider(fullurl,beginPage,endPage)