mirror of
https://github.com/donnemartin/system-design-primer.git
synced 2025-12-14 17:08:56 +03:00
poriting to noat.cards
This commit is contained in:
@@ -3,44 +3,44 @@
|
||||
from mrjob.job import MRJob
|
||||
|
||||
|
||||
class HitCounts(MRJob):
|
||||
class HitCounts(MRJob) :
|
||||
|
||||
def extract_url(self, line):
|
||||
def extract_url(self, line) :
|
||||
"""Extract the generated url from the log line."""
|
||||
pass
|
||||
|
||||
def extract_year_month(self, line):
|
||||
def extract_year_month(self, line) :
|
||||
"""Return the year and month portions of the timestamp."""
|
||||
pass
|
||||
|
||||
def mapper(self, _, line):
|
||||
def mapper(self, _, line) :
|
||||
"""Parse each log line, extract and transform relevant lines.
|
||||
|
||||
Emit key value pairs of the form:
|
||||
|
||||
(2016-01, url0), 1
|
||||
(2016-01, url0), 1
|
||||
(2016-01, url1), 1
|
||||
(2016-01, url0) , 1
|
||||
(2016-01, url0) , 1
|
||||
(2016-01, url1) , 1
|
||||
"""
|
||||
url = self.extract_url(line)
|
||||
period = self.extract_year_month(line)
|
||||
yield (period, url), 1
|
||||
url = self.extract_url(line)
|
||||
period = self.extract_year_month(line)
|
||||
yield (period, url) , 1
|
||||
|
||||
def reducer(self, key, values):
|
||||
def reducer(self, key, values) :
|
||||
"""Sum values for each key.
|
||||
|
||||
(2016-01, url0), 2
|
||||
(2016-01, url1), 1
|
||||
(2016-01, url0) , 2
|
||||
(2016-01, url1) , 1
|
||||
"""
|
||||
yield key, sum(values)
|
||||
yield key, sum(values)
|
||||
|
||||
def steps(self):
|
||||
def steps(self) :
|
||||
"""Run the map and reduce steps."""
|
||||
return [
|
||||
self.mr(mapper=self.mapper,
|
||||
reducer=self.reducer)
|
||||
reducer=self.reducer)
|
||||
]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
HitCounts.run()
|
||||
HitCounts.run()
|
||||
|
||||
Reference in New Issue
Block a user