poriting to noat.cards

This commit is contained in:
Vu
2021-03-14 17:08:05 +07:00
parent 6984b4e956
commit f4af06bdff
48 changed files with 3545 additions and 3384 deletions

View File

@@ -3,44 +3,44 @@
from mrjob.job import MRJob
class HitCounts(MRJob):
class HitCounts(MRJob) :
def extract_url(self, line):
def extract_url(self, line) :
"""Extract the generated url from the log line."""
pass
def extract_year_month(self, line):
def extract_year_month(self, line) :
"""Return the year and month portions of the timestamp."""
pass
def mapper(self, _, line):
def mapper(self, _, line) :
"""Parse each log line, extract and transform relevant lines.
Emit key value pairs of the form:
(2016-01, url0), 1
(2016-01, url0), 1
(2016-01, url1), 1
(2016-01, url0) , 1
(2016-01, url0) , 1
(2016-01, url1) , 1
"""
url = self.extract_url(line)
period = self.extract_year_month(line)
yield (period, url), 1
url = self.extract_url(line)
period = self.extract_year_month(line)
yield (period, url) , 1
def reducer(self, key, values):
def reducer(self, key, values) :
"""Sum values for each key.
(2016-01, url0), 2
(2016-01, url1), 1
(2016-01, url0) , 2
(2016-01, url1) , 1
"""
yield key, sum(values)
yield key, sum(values)
def steps(self):
def steps(self) :
"""Run the map and reduce steps."""
return [
self.mr(mapper=self.mapper,
reducer=self.reducer)
reducer=self.reducer)
]
if __name__ == '__main__':
HitCounts.run()
HitCounts.run()