Enable syntax highlighting in all python code snippets (#268)
parent
8b04d4d5fe
commit
116634f5b3
|
@ -1166,7 +1166,7 @@ Redisはさらに以下のような機能を備えています:
|
||||||
* エントリをキャッシュに追加します
|
* エントリをキャッシュに追加します
|
||||||
* エントリを返します
|
* エントリを返します
|
||||||
|
|
||||||
```
|
```python
|
||||||
def get_user(self, user_id):
|
def get_user(self, user_id):
|
||||||
user = cache.get("user.{0}", user_id)
|
user = cache.get("user.{0}", user_id)
|
||||||
if user is None:
|
if user is None:
|
||||||
|
@ -1209,7 +1209,7 @@ set_user(12345, {"foo":"bar"})
|
||||||
|
|
||||||
キャッシュコード:
|
キャッシュコード:
|
||||||
|
|
||||||
```
|
```python
|
||||||
def set_user(user_id, values):
|
def set_user(user_id, values):
|
||||||
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
||||||
cache.set(user_id, user)
|
cache.set(user_id, user)
|
||||||
|
|
|
@ -1180,7 +1180,7 @@ Redis 有下列附加功能:
|
||||||
- 将查找到的结果存储到缓存中
|
- 将查找到的结果存储到缓存中
|
||||||
- 返回所需内容
|
- 返回所需内容
|
||||||
|
|
||||||
```
|
```python
|
||||||
def get_user(self, user_id):
|
def get_user(self, user_id):
|
||||||
user = cache.get("user.{0}", user_id)
|
user = cache.get("user.{0}", user_id)
|
||||||
if user is None:
|
if user is None:
|
||||||
|
@ -1223,7 +1223,7 @@ set_user(12345, {"foo":"bar"})
|
||||||
|
|
||||||
缓存代码:
|
缓存代码:
|
||||||
|
|
||||||
```
|
```python
|
||||||
def set_user(user_id, values):
|
def set_user(user_id, values):
|
||||||
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
||||||
cache.set(user_id, user)
|
cache.set(user_id, user)
|
||||||
|
|
|
@ -1167,7 +1167,7 @@ Redis 還有以下額外的功能:
|
||||||
* 將該筆記錄儲存到快取
|
* 將該筆記錄儲存到快取
|
||||||
* 將資料返回
|
* 將資料返回
|
||||||
|
|
||||||
```
|
```python
|
||||||
def get_user(self, user_id):
|
def get_user(self, user_id):
|
||||||
user = cache.get("user.{0}", user_id)
|
user = cache.get("user.{0}", user_id)
|
||||||
if user is None:
|
if user is None:
|
||||||
|
@ -1210,7 +1210,7 @@ set_user(12345, {"foo":"bar"})
|
||||||
|
|
||||||
快取程式碼:
|
快取程式碼:
|
||||||
|
|
||||||
```
|
```python
|
||||||
def set_user(user_id, values):
|
def set_user(user_id, values):
|
||||||
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
||||||
cache.set(user_id, user)
|
cache.set(user_id, user)
|
||||||
|
|
|
@ -1164,7 +1164,7 @@ The application is responsible for reading and writing from storage. The cache
|
||||||
* Add entry to cache
|
* Add entry to cache
|
||||||
* Return entry
|
* Return entry
|
||||||
|
|
||||||
```
|
```python
|
||||||
def get_user(self, user_id):
|
def get_user(self, user_id):
|
||||||
user = cache.get("user.{0}", user_id)
|
user = cache.get("user.{0}", user_id)
|
||||||
if user is None:
|
if user is None:
|
||||||
|
@ -1201,13 +1201,13 @@ The application uses the cache as the main data store, reading and writing data
|
||||||
|
|
||||||
Application code:
|
Application code:
|
||||||
|
|
||||||
```
|
```python
|
||||||
set_user(12345, {"foo":"bar"})
|
set_user(12345, {"foo":"bar"})
|
||||||
```
|
```
|
||||||
|
|
||||||
Cache code:
|
Cache code:
|
||||||
|
|
||||||
```
|
```python
|
||||||
def set_user(user_id, values):
|
def set_user(user_id, values):
|
||||||
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
||||||
cache.set(user_id, user)
|
cache.set(user_id, user)
|
||||||
|
|
|
@ -182,7 +182,7 @@ For the **Category Service**, we can seed a seller-to-category dictionary with t
|
||||||
|
|
||||||
**Clarify with your interviewer how much code you are expected to write**.
|
**Clarify with your interviewer how much code you are expected to write**.
|
||||||
|
|
||||||
```
|
```python
|
||||||
class DefaultCategories(Enum):
|
class DefaultCategories(Enum):
|
||||||
|
|
||||||
HOUSING = 0
|
HOUSING = 0
|
||||||
|
@ -199,7 +199,7 @@ seller_category_map['Target'] = DefaultCategories.SHOPPING
|
||||||
|
|
||||||
For sellers not initially seeded in the map, we could use a crowdsourcing effort by evaluating the manual category overrides our users provide. We could use a heap to quickly lookup the top manual override per seller in O(1) time.
|
For sellers not initially seeded in the map, we could use a crowdsourcing effort by evaluating the manual category overrides our users provide. We could use a heap to quickly lookup the top manual override per seller in O(1) time.
|
||||||
|
|
||||||
```
|
```python
|
||||||
class Categorizer(object):
|
class Categorizer(object):
|
||||||
|
|
||||||
def __init__(self, seller_category_map, self.seller_category_crowd_overrides_map):
|
def __init__(self, seller_category_map, self.seller_category_crowd_overrides_map):
|
||||||
|
@ -219,7 +219,7 @@ class Categorizer(object):
|
||||||
|
|
||||||
Transaction implementation:
|
Transaction implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class Transaction(object):
|
class Transaction(object):
|
||||||
|
|
||||||
def __init__(self, created_at, seller, amount):
|
def __init__(self, created_at, seller, amount):
|
||||||
|
@ -232,7 +232,7 @@ class Transaction(object):
|
||||||
|
|
||||||
To start, we could use a generic budget template that allocates category amounts based on income tiers. Using this approach, we would not have to store the 100 million budget items identified in the constraints, only those that the user overrides. If a user overrides a budget category, which we could store the override in the `TABLE budget_overrides`.
|
To start, we could use a generic budget template that allocates category amounts based on income tiers. Using this approach, we would not have to store the 100 million budget items identified in the constraints, only those that the user overrides. If a user overrides a budget category, which we could store the override in the `TABLE budget_overrides`.
|
||||||
|
|
||||||
```
|
```python
|
||||||
class Budget(object):
|
class Budget(object):
|
||||||
|
|
||||||
def __init__(self, income):
|
def __init__(self, income):
|
||||||
|
@ -273,7 +273,7 @@ user_id timestamp seller amount
|
||||||
|
|
||||||
**MapReduce** implementation:
|
**MapReduce** implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class SpendingByCategory(MRJob):
|
class SpendingByCategory(MRJob):
|
||||||
|
|
||||||
def __init__(self, categorizer):
|
def __init__(self, categorizer):
|
||||||
|
|
|
@ -130,7 +130,7 @@ To generate the unique url, we could:
|
||||||
* Base 64 is another popular encoding but provides issues for urls because of the additional `+` and `/` characters
|
* Base 64 is another popular encoding but provides issues for urls because of the additional `+` and `/` characters
|
||||||
* The following [Base 62 pseudocode](http://stackoverflow.com/questions/742013/how-to-code-a-url-shortener) runs in O(k) time where k is the number of digits = 7:
|
* The following [Base 62 pseudocode](http://stackoverflow.com/questions/742013/how-to-code-a-url-shortener) runs in O(k) time where k is the number of digits = 7:
|
||||||
|
|
||||||
```
|
```python
|
||||||
def base_encode(num, base=62):
|
def base_encode(num, base=62):
|
||||||
digits = []
|
digits = []
|
||||||
while num > 0
|
while num > 0
|
||||||
|
@ -142,7 +142,7 @@ def base_encode(num, base=62):
|
||||||
|
|
||||||
* Take the first 7 characters of the output, which results in 62^7 possible values and should be sufficient to handle our constraint of 360 million shortlinks in 3 years:
|
* Take the first 7 characters of the output, which results in 62^7 possible values and should be sufficient to handle our constraint of 360 million shortlinks in 3 years:
|
||||||
|
|
||||||
```
|
```python
|
||||||
url = base_encode(md5(ip_address+timestamp))[:URL_LENGTH]
|
url = base_encode(md5(ip_address+timestamp))[:URL_LENGTH]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -97,7 +97,7 @@ The cache can use a doubly-linked list: new items will be added to the head whil
|
||||||
|
|
||||||
**Query API Server** implementation:
|
**Query API Server** implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class QueryApi(object):
|
class QueryApi(object):
|
||||||
|
|
||||||
def __init__(self, memory_cache, reverse_index_service):
|
def __init__(self, memory_cache, reverse_index_service):
|
||||||
|
@ -121,7 +121,7 @@ class QueryApi(object):
|
||||||
|
|
||||||
**Node** implementation:
|
**Node** implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class Node(object):
|
class Node(object):
|
||||||
|
|
||||||
def __init__(self, query, results):
|
def __init__(self, query, results):
|
||||||
|
@ -131,7 +131,7 @@ class Node(object):
|
||||||
|
|
||||||
**LinkedList** implementation:
|
**LinkedList** implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class LinkedList(object):
|
class LinkedList(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -150,7 +150,7 @@ class LinkedList(object):
|
||||||
|
|
||||||
**Cache** implementation:
|
**Cache** implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class Cache(object):
|
class Cache(object):
|
||||||
|
|
||||||
def __init__(self, MAX_SIZE):
|
def __init__(self, MAX_SIZE):
|
||||||
|
|
|
@ -102,7 +102,7 @@ We'll use a multi-step **MapReduce**:
|
||||||
* **Step 1** - Transform the data to `(category, product_id), sum(quantity)`
|
* **Step 1** - Transform the data to `(category, product_id), sum(quantity)`
|
||||||
* **Step 2** - Perform a distributed sort
|
* **Step 2** - Perform a distributed sort
|
||||||
|
|
||||||
```
|
```python
|
||||||
class SalesRanker(MRJob):
|
class SalesRanker(MRJob):
|
||||||
|
|
||||||
def within_past_week(self, timestamp):
|
def within_past_week(self, timestamp):
|
||||||
|
|
|
@ -62,7 +62,7 @@ Handy conversion guide:
|
||||||
|
|
||||||
Without the constraint of millions of users (vertices) and billions of friend relationships (edges), we could solve this unweighted shortest path task with a general BFS approach:
|
Without the constraint of millions of users (vertices) and billions of friend relationships (edges), we could solve this unweighted shortest path task with a general BFS approach:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class Graph(Graph):
|
class Graph(Graph):
|
||||||
|
|
||||||
def shortest_path(self, source, dest):
|
def shortest_path(self, source, dest):
|
||||||
|
@ -117,7 +117,7 @@ We won't be able to fit all users on the same machine, we'll need to [shard](htt
|
||||||
|
|
||||||
**Lookup Service** implementation:
|
**Lookup Service** implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class LookupService(object):
|
class LookupService(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -132,7 +132,7 @@ class LookupService(object):
|
||||||
|
|
||||||
**Person Server** implementation:
|
**Person Server** implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class PersonServer(object):
|
class PersonServer(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -151,7 +151,7 @@ class PersonServer(object):
|
||||||
|
|
||||||
**Person** implementation:
|
**Person** implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class Person(object):
|
class Person(object):
|
||||||
|
|
||||||
def __init__(self, id, name, friend_ids):
|
def __init__(self, id, name, friend_ids):
|
||||||
|
@ -162,7 +162,7 @@ class Person(object):
|
||||||
|
|
||||||
**User Graph Service** implementation:
|
**User Graph Service** implementation:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class UserGraphService(object):
|
class UserGraphService(object):
|
||||||
|
|
||||||
def __init__(self, lookup_service):
|
def __init__(self, lookup_service):
|
||||||
|
|
|
@ -100,7 +100,7 @@ We could store `links_to_crawl` and `crawled_links` in a key-value **NoSQL Datab
|
||||||
|
|
||||||
`PagesDataStore` is an abstraction within the **Crawler Service** that uses the **NoSQL Database**:
|
`PagesDataStore` is an abstraction within the **Crawler Service** that uses the **NoSQL Database**:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class PagesDataStore(object):
|
class PagesDataStore(object):
|
||||||
|
|
||||||
def __init__(self, db);
|
def __init__(self, db);
|
||||||
|
@ -134,7 +134,7 @@ class PagesDataStore(object):
|
||||||
|
|
||||||
`Page` is an abstraction within the **Crawler Service** that encapsulates a page, its contents, child urls, and signature:
|
`Page` is an abstraction within the **Crawler Service** that encapsulates a page, its contents, child urls, and signature:
|
||||||
|
|
||||||
```
|
```python
|
||||||
class Page(object):
|
class Page(object):
|
||||||
|
|
||||||
def __init__(self, url, contents, child_urls, signature):
|
def __init__(self, url, contents, child_urls, signature):
|
||||||
|
@ -146,7 +146,7 @@ class Page(object):
|
||||||
|
|
||||||
`Crawler` is the main class within **Crawler Service**, composed of `Page` and `PagesDataStore`.
|
`Crawler` is the main class within **Crawler Service**, composed of `Page` and `PagesDataStore`.
|
||||||
|
|
||||||
```
|
```python
|
||||||
class Crawler(object):
|
class Crawler(object):
|
||||||
|
|
||||||
def __init__(self, data_store, reverse_index_queue, doc_index_queue):
|
def __init__(self, data_store, reverse_index_queue, doc_index_queue):
|
||||||
|
@ -187,7 +187,7 @@ We'll want to remove duplicate urls:
|
||||||
* For smaller lists we could use something like `sort | unique`
|
* For smaller lists we could use something like `sort | unique`
|
||||||
* With 1 billion links to crawl, we could use **MapReduce** to output only entries that have a frequency of 1
|
* With 1 billion links to crawl, we could use **MapReduce** to output only entries that have a frequency of 1
|
||||||
|
|
||||||
```
|
```python
|
||||||
class RemoveDuplicateUrls(MRJob):
|
class RemoveDuplicateUrls(MRJob):
|
||||||
|
|
||||||
def mapper(self, _, line):
|
def mapper(self, _, line):
|
||||||
|
|
Loading…
Reference in New Issue