import re ... @config(age=10 * 24 * 60 * 60) def index_page(self, response): for each in response.doc('a[href^="http"]').items(): if re.match("http://movie.douban.com/tag/\w+", each.attr.href, re.U): self.crawl(each.attr.href, callback=self.list_page)