本文 |
# apacheのアクセスログを日付毎に分割
# 適宜作業ディレクトリを作成、移動
mkdir work
cd work
cat /var/log/httpd/access_log | awk '
BEGIN {
m["Jan"] = "01"; m["Feb"] = "02"; m["Mar"] = "03";
m["Apr"] = "04"; m["May"] = "05"; m["Jun"] = "06";
m["Jul"] = "07"; m["Aug"] = "08"; m["Sep"] = "09";
m["Oct"] = "10"; m["Nov"] = "11"; m["Dec"] = "12";
}
{
# 40x, 50x、画像やcss, スクリプトへのアクセスを除外する場合
if ($8 !~ /(\.(css|js|png|jpg|gif|ico)|robots.txt)/ && $10 !~ /^(4|5)/) {
if (match($4, /\[(..)\/([^\/]+)\/(....)/, x)) {
print $0 >> "access_log." x[3] "-" m[x[2]] "-" x[1];
}
}
}
'
# gzでまとめてあるログと合わせて処理する場合
#zcat /var/log/httpd/access_log*.gz | cat - /var/log/httpd/access_log | awk ...
# 日毎の件数
(for NAME in `ls -1 access_log.*`; do echo `wc -l $NAME` | awk '{ d=gsub("access_log.", "", $2); print $2 "\t" $1; }'; done) > access-daily.tsv
# パス毎の閲覧数
cat access_log.* | \
awk -F '"' '{ print $2, $3 }' | \
awk '{ print $2 }' | \
sort | uniq -c | sort -nr | awk '{ print $1, "\t", $2 }' > access-path-rank.tsv
# エラーログからエラー原因だけ抜き出し、重複排除 cat /var/log/httpd/ssl_error_log | awk '{ gsub(".+\\[client [^\\]]+\\] ", ""); print; }' | sort | uniq
|