Crawler should add page to index and document queue

This commit is contained in:
lor-engel
2021-10-15 16:29:37 -07:00
committed by GitHub
parent 6984b4e956
commit f6f1c3e5b1

View File

@@ -159,6 +159,8 @@ class Crawler(object):
... ...
def crawl_page(self, page): def crawl_page(self, page):
self.reverse_index_queue.add(page.url)
self.doc_index_queue.add(page.url)
for url in page.child_urls: for url in page.child_urls:
self.data_store.add_link_to_crawl(url) self.data_store.add_link_to_crawl(url)
page.signature = self.create_signature(page) page.signature = self.create_signature(page)