Crawler should add page to index and document queue

pull/599/head
lor-engel 2021-10-15 16:29:37 -07:00 committed by GitHub
parent 6984b4e956
commit f6f1c3e5b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 0 deletions

View File

@ -159,6 +159,8 @@ class Crawler(object):
...
def crawl_page(self, page):
self.reverse_index_queue.add(page.url)
self.doc_index_queue.add(page.url)
for url in page.child_urls:
self.data_store.add_link_to_crawl(url)
page.signature = self.create_signature(page)