Respuesta :
Answer:
#python script for parsing log
#following programme assumes that all the logs follow common log format of apache logs
#given below is example of a common format log
#127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326
import requests
url = "https://s3.amazonaws.com/tcmg476/http_access_log"
r = requests.get(file_url, stream = True)
with open("python.txt","wb") as textfile:
 for chunk in r.iter_content(chunk_size=1024):
 # writing one chunk at a time to pdf file
   if chunk:
     textfile.write(chunk)
result={
"total_requests":0,
"per_day_data":{},
"per_week_data":{},
"per_month_data":{},
"request_not_successful":0,
"requests_redirected_elsewhere":0,
"filewise_request_frequency":{},
"most_requested_file":[0,[]], #maximum request and list of all files with that number of request
"least_requested_file":[0,[]] #minimum request and list of all files with that number of request
}
file = open("python.txt")
date_day = None
days = 0
week = None
months_done = []
for line in file:
 if(len(line)>=56):
   result["total_requests"]+=1
   data=line.split()
   date = data[3][1::].split(':')
   if not (date_day == date[0]):
     date_day = date[0]
     days += 1
     if(days%7 == 0):
       week = date_day
   if date[0] in result["per_day_data"]:
     result["per_day_data"][date[0]]+=1
   else:
     result["per_day_data"][date[0]]=0
  Â
   if week in result["per_week_data"]:
     result["per_week_data"][week]+=1
   else:
     result["per_week_data"][week] = 0
   month = date[0][3::]
   if month not in months_done:
     file_name = month[:3:]+month[4::]
     if(len(file_name)) == 7:
       month_file = open(month[:3:]+month[4::]+".txt",'w')
       print(file_name)
     months_done.append(month)
   month_file.write(line)
   if month in result["per_month_data"]:
     result["per_month_data"][month]+=1
   else:
     result["per_month_data"][month]=0
   if data[-2][0]=="4":
     result["request_not_successful"]+=1
   if data[-2][0]=="3":
     result["requests_redirected_elsewhere"]+=1
   if data[6] in result["filewise_request_frequency"]:
     result["filewise_request_frequency"][data[6]]+=1
   else:
     result["filewise_request_frequency"][data[6]]=1
Â
maxm=result["filewise_request_frequency"]["index.html"]
minm=result["filewise_request_frequency"]["index.html"]
maxlist=["index.html"]
minlist=["index.html"]
for i in result["filewise_request_frequency"]:
 if result["filewise_request_frequency"][i] > maxm:
   maxm = result["filewise_request_frequency"][i]
   maxlist=[i]
 if result["filewise_request_frequency"][i] < minm:
   minm = result["filewise_request_frequency"][i]
   minlist=[i]
Â
 if result["filewise_request_frequency"][i] == maxm:
   maxlist.append(i)
Â
 if result["filewise_request_frequency"][i] == minm:
   minlist.append(i)
result["most_requested_file"]=[maxm,maxlist]
result["least_requested_file"]=[minm,minlist]
print(result)
Explanation: