From f6f1c3e5b14a99abfd55211476e32342a6a2329d Mon Sep 17 00:00:00 2001 From: lor-engel <83264925+lor-engel@users.noreply.github.com> Date: Fri, 15 Oct 2021 16:29:37 -0700 Subject: [PATCH] Crawler should add page to index and document queue --- solutions/system_design/web_crawler/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solutions/system_design/web_crawler/README.md b/solutions/system_design/web_crawler/README.md index e6e79ad2..0c481416 100644 --- a/solutions/system_design/web_crawler/README.md +++ b/solutions/system_design/web_crawler/README.md @@ -159,6 +159,8 @@ class Crawler(object): ... def crawl_page(self, page): + self.reverse_index_queue.add(page.url) + self.doc_index_queue.add(page.url) for url in page.child_urls: self.data_store.add_link_to_crawl(url) page.signature = self.create_signature(page)