Crawler should add page to index and document queue
parent
6984b4e956
commit
f6f1c3e5b1
|
@ -159,6 +159,8 @@ class Crawler(object):
|
|||
...
|
||||
|
||||
def crawl_page(self, page):
|
||||
self.reverse_index_queue.add(page.url)
|
||||
self.doc_index_queue.add(page.url)
|
||||
for url in page.child_urls:
|
||||
self.data_store.add_link_to_crawl(url)
|
||||
page.signature = self.create_signature(page)
|
||||
|
|
Loading…
Reference in New Issue