def
detail_parse(
self
, response):
ID1:大种类
ID
ID2:大种类名称 ID3:小种类
ID
ID4:小种类名称
url
=
'http://category.dangdang.com/pg1-cp01.{}.00.00.00.00.html'
.
format
(response.meta[
"ID1"
])
category_small
=
requests.get(url)
contents
=
etree.HTML(category_small.content.decode(
'gbk'
))
goodslist
=
contents.xpath(
'//*[@class="sort_box"]/ul/li[1]/div/span'
)
for
goods
in
goodslist:
try
:
category_small_name
=
goods.xpath(
'a/text()'
).pop().replace(
" "
,"").split(
'('
)[
0
]
category_small_id
=
goods.xpath(
'a/@href'
).pop().split(
'.'
)[
2
]
category_small_url
=
"http://category.dangdang.com/pg1-cp01.{}.{}.00.00.00.html"
.\
format
(
str
(response.meta[
"ID1"
]),
str
(category_small_id))
yield
scrapy.Request(url
=
category_small_url, callback
=
self
.third_parse, meta
=
{
"ID1"
:response.meta[
"ID1"
],\
"ID2"
:response.meta[
"ID2"
],
"ID3"
:category_small_id,
"ID4"
:category_small_name})
except
Exception:
Pass