Enable syntax highlighting in all python code snippets (#268)
parent
8b04d4d5fe
commit
116634f5b3
|
@ -1166,7 +1166,7 @@ Redisはさらに以下のような機能を備えています:
|
|||
* エントリをキャッシュに追加します
|
||||
* エントリを返します
|
||||
|
||||
```
|
||||
```python
|
||||
def get_user(self, user_id):
|
||||
user = cache.get("user.{0}", user_id)
|
||||
if user is None:
|
||||
|
@ -1209,7 +1209,7 @@ set_user(12345, {"foo":"bar"})
|
|||
|
||||
キャッシュコード:
|
||||
|
||||
```
|
||||
```python
|
||||
def set_user(user_id, values):
|
||||
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
||||
cache.set(user_id, user)
|
||||
|
|
|
@ -1180,7 +1180,7 @@ Redis 有下列附加功能:
|
|||
- 将查找到的结果存储到缓存中
|
||||
- 返回所需内容
|
||||
|
||||
```
|
||||
```python
|
||||
def get_user(self, user_id):
|
||||
user = cache.get("user.{0}", user_id)
|
||||
if user is None:
|
||||
|
@ -1223,7 +1223,7 @@ set_user(12345, {"foo":"bar"})
|
|||
|
||||
缓存代码:
|
||||
|
||||
```
|
||||
```python
|
||||
def set_user(user_id, values):
|
||||
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
||||
cache.set(user_id, user)
|
||||
|
|
|
@ -1167,7 +1167,7 @@ Redis 還有以下額外的功能:
|
|||
* 將該筆記錄儲存到快取
|
||||
* 將資料返回
|
||||
|
||||
```
|
||||
```python
|
||||
def get_user(self, user_id):
|
||||
user = cache.get("user.{0}", user_id)
|
||||
if user is None:
|
||||
|
@ -1210,7 +1210,7 @@ set_user(12345, {"foo":"bar"})
|
|||
|
||||
快取程式碼:
|
||||
|
||||
```
|
||||
```python
|
||||
def set_user(user_id, values):
|
||||
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
||||
cache.set(user_id, user)
|
||||
|
|
|
@ -1164,7 +1164,7 @@ The application is responsible for reading and writing from storage. The cache
|
|||
* Add entry to cache
|
||||
* Return entry
|
||||
|
||||
```
|
||||
```python
|
||||
def get_user(self, user_id):
|
||||
user = cache.get("user.{0}", user_id)
|
||||
if user is None:
|
||||
|
@ -1201,13 +1201,13 @@ The application uses the cache as the main data store, reading and writing data
|
|||
|
||||
Application code:
|
||||
|
||||
```
|
||||
```python
|
||||
set_user(12345, {"foo":"bar"})
|
||||
```
|
||||
|
||||
Cache code:
|
||||
|
||||
```
|
||||
```python
|
||||
def set_user(user_id, values):
|
||||
user = db.query("UPDATE Users WHERE id = {0}", user_id, values)
|
||||
cache.set(user_id, user)
|
||||
|
|
|
@ -182,7 +182,7 @@ For the **Category Service**, we can seed a seller-to-category dictionary with t
|
|||
|
||||
**Clarify with your interviewer how much code you are expected to write**.
|
||||
|
||||
```
|
||||
```python
|
||||
class DefaultCategories(Enum):
|
||||
|
||||
HOUSING = 0
|
||||
|
@ -199,7 +199,7 @@ seller_category_map['Target'] = DefaultCategories.SHOPPING
|
|||
|
||||
For sellers not initially seeded in the map, we could use a crowdsourcing effort by evaluating the manual category overrides our users provide. We could use a heap to quickly lookup the top manual override per seller in O(1) time.
|
||||
|
||||
```
|
||||
```python
|
||||
class Categorizer(object):
|
||||
|
||||
def __init__(self, seller_category_map, self.seller_category_crowd_overrides_map):
|
||||
|
@ -219,7 +219,7 @@ class Categorizer(object):
|
|||
|
||||
Transaction implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class Transaction(object):
|
||||
|
||||
def __init__(self, created_at, seller, amount):
|
||||
|
@ -232,7 +232,7 @@ class Transaction(object):
|
|||
|
||||
To start, we could use a generic budget template that allocates category amounts based on income tiers. Using this approach, we would not have to store the 100 million budget items identified in the constraints, only those that the user overrides. If a user overrides a budget category, which we could store the override in the `TABLE budget_overrides`.
|
||||
|
||||
```
|
||||
```python
|
||||
class Budget(object):
|
||||
|
||||
def __init__(self, income):
|
||||
|
@ -273,7 +273,7 @@ user_id timestamp seller amount
|
|||
|
||||
**MapReduce** implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class SpendingByCategory(MRJob):
|
||||
|
||||
def __init__(self, categorizer):
|
||||
|
|
|
@ -130,7 +130,7 @@ To generate the unique url, we could:
|
|||
* Base 64 is another popular encoding but provides issues for urls because of the additional `+` and `/` characters
|
||||
* The following [Base 62 pseudocode](http://stackoverflow.com/questions/742013/how-to-code-a-url-shortener) runs in O(k) time where k is the number of digits = 7:
|
||||
|
||||
```
|
||||
```python
|
||||
def base_encode(num, base=62):
|
||||
digits = []
|
||||
while num > 0
|
||||
|
@ -142,7 +142,7 @@ def base_encode(num, base=62):
|
|||
|
||||
* Take the first 7 characters of the output, which results in 62^7 possible values and should be sufficient to handle our constraint of 360 million shortlinks in 3 years:
|
||||
|
||||
```
|
||||
```python
|
||||
url = base_encode(md5(ip_address+timestamp))[:URL_LENGTH]
|
||||
```
|
||||
|
||||
|
|
|
@ -97,7 +97,7 @@ The cache can use a doubly-linked list: new items will be added to the head whil
|
|||
|
||||
**Query API Server** implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class QueryApi(object):
|
||||
|
||||
def __init__(self, memory_cache, reverse_index_service):
|
||||
|
@ -121,7 +121,7 @@ class QueryApi(object):
|
|||
|
||||
**Node** implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class Node(object):
|
||||
|
||||
def __init__(self, query, results):
|
||||
|
@ -131,7 +131,7 @@ class Node(object):
|
|||
|
||||
**LinkedList** implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class LinkedList(object):
|
||||
|
||||
def __init__(self):
|
||||
|
@ -150,7 +150,7 @@ class LinkedList(object):
|
|||
|
||||
**Cache** implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class Cache(object):
|
||||
|
||||
def __init__(self, MAX_SIZE):
|
||||
|
|
|
@ -102,7 +102,7 @@ We'll use a multi-step **MapReduce**:
|
|||
* **Step 1** - Transform the data to `(category, product_id), sum(quantity)`
|
||||
* **Step 2** - Perform a distributed sort
|
||||
|
||||
```
|
||||
```python
|
||||
class SalesRanker(MRJob):
|
||||
|
||||
def within_past_week(self, timestamp):
|
||||
|
|
|
@ -62,7 +62,7 @@ Handy conversion guide:
|
|||
|
||||
Without the constraint of millions of users (vertices) and billions of friend relationships (edges), we could solve this unweighted shortest path task with a general BFS approach:
|
||||
|
||||
```
|
||||
```python
|
||||
class Graph(Graph):
|
||||
|
||||
def shortest_path(self, source, dest):
|
||||
|
@ -117,7 +117,7 @@ We won't be able to fit all users on the same machine, we'll need to [shard](htt
|
|||
|
||||
**Lookup Service** implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class LookupService(object):
|
||||
|
||||
def __init__(self):
|
||||
|
@ -132,7 +132,7 @@ class LookupService(object):
|
|||
|
||||
**Person Server** implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class PersonServer(object):
|
||||
|
||||
def __init__(self):
|
||||
|
@ -151,7 +151,7 @@ class PersonServer(object):
|
|||
|
||||
**Person** implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class Person(object):
|
||||
|
||||
def __init__(self, id, name, friend_ids):
|
||||
|
@ -162,7 +162,7 @@ class Person(object):
|
|||
|
||||
**User Graph Service** implementation:
|
||||
|
||||
```
|
||||
```python
|
||||
class UserGraphService(object):
|
||||
|
||||
def __init__(self, lookup_service):
|
||||
|
|
|
@ -100,7 +100,7 @@ We could store `links_to_crawl` and `crawled_links` in a key-value **NoSQL Datab
|
|||
|
||||
`PagesDataStore` is an abstraction within the **Crawler Service** that uses the **NoSQL Database**:
|
||||
|
||||
```
|
||||
```python
|
||||
class PagesDataStore(object):
|
||||
|
||||
def __init__(self, db);
|
||||
|
@ -134,7 +134,7 @@ class PagesDataStore(object):
|
|||
|
||||
`Page` is an abstraction within the **Crawler Service** that encapsulates a page, its contents, child urls, and signature:
|
||||
|
||||
```
|
||||
```python
|
||||
class Page(object):
|
||||
|
||||
def __init__(self, url, contents, child_urls, signature):
|
||||
|
@ -146,7 +146,7 @@ class Page(object):
|
|||
|
||||
`Crawler` is the main class within **Crawler Service**, composed of `Page` and `PagesDataStore`.
|
||||
|
||||
```
|
||||
```python
|
||||
class Crawler(object):
|
||||
|
||||
def __init__(self, data_store, reverse_index_queue, doc_index_queue):
|
||||
|
@ -187,7 +187,7 @@ We'll want to remove duplicate urls:
|
|||
* For smaller lists we could use something like `sort | unique`
|
||||
* With 1 billion links to crawl, we could use **MapReduce** to output only entries that have a frequency of 1
|
||||
|
||||
```
|
||||
```python
|
||||
class RemoveDuplicateUrls(MRJob):
|
||||
|
||||
def mapper(self, _, line):
|
||||
|
|
Loading…
Reference in New Issue