Crawler should add page to index and document queue
parent
6984b4e956
commit
f6f1c3e5b1
|
@ -159,6 +159,8 @@ class Crawler(object):
|
||||||
...
|
...
|
||||||
|
|
||||||
def crawl_page(self, page):
|
def crawl_page(self, page):
|
||||||
|
self.reverse_index_queue.add(page.url)
|
||||||
|
self.doc_index_queue.add(page.url)
|
||||||
for url in page.child_urls:
|
for url in page.child_urls:
|
||||||
self.data_store.add_link_to_crawl(url)
|
self.data_store.add_link_to_crawl(url)
|
||||||
page.signature = self.create_signature(page)
|
page.signature = self.create_signature(page)
|
||||||
|
|
Loading…
Reference in New Issue