实例展示 =========== 可用实例:: 统计过去1小时发生Error最多的3个位置: severity: ERROR | select module ,count(*) as count GROUP BY module ORDER BY count DESC LIMIT 30 统计过去15分钟各种日志级别产生的日志条数: * | select severity ,count(*) as count GROUP BY severity ORDER BY count DESC 对比不同时间段的聚类日志数量: * | select v.signature, v.pattern, coalesce(v.cmp[1],0) as count_now, coalesce(v.cmp[2],0) as count_before, coalesce(v.cmp[1],0) - coalesce(v.cmp[2],0) as count_diff from ( select p.signature as signature , arbitrary(p.pattern) as pattern , compare(p.count,300) as cmp from (select log_reduce() as p from log) group by p.signature )v order by count_diff desc Nginx日志分析:: 按小时统计PV/UV统计(pv_uv): * | select approx_distinct("__tag__:__client_ip__") as uv , count(1) as pv , date_format(date_trunc('hour', __time__), '%m-%d %H:%i') as time group by date_format(date_trunc('hour', __time__), '%m-%d %H:%i') order by time limit 1000 访问地域分析(ip_distribution): * | select ip_to_province(client_ip) as address, count(1) as c group by ip_to_province(client_ip) order by c desc limit 100 * | select ip_to_geo("c-ip") as country, count(1) as c group by ip_to_geo("c-ip") limit 100 访问前十地址(top_page): * | select split_part(request_uri,'?',1) as path, count(1) as pv group by split_part(request_uri,'?',1) order by pv desc limit 10 请求方法占比(http_method_percentage): * | select request_method, count(1) as pv group by request_method order by pv DESC 请求状态占比(http_status_percentage): * | select status, count(1) as pv group by status order by pv DESC 请求UA占比(user_agent): * | select count(1) as pv, case when http_user_agent like '%Chrome%' then 'Chrome' when http_user_agent like '%Firefox%' then 'Firefox' when http_user_agent like '%Safari%' then 'Safari' else 'unKnown' end as http_user_agent group by http_user_agent order by pv desc limit 10 前十访问来源(top_10_referer): * | select http_referer, count(1) as pv group by http_referer order by pv desc limit 10 浏览流入流出统计(net_in_net_out): *| select sum("body_bytes_sent") as net_out, sum("request_length") as net_in , date_format(date_trunc('hour', __time__), '%m-%d %H:%i') as time group by date_format(date_trunc('hour', __time__), '%m-%d %H:%i') order by time limit 10000 Nginx访问日志诊断及优化:: 统计平均延时和最大延时: * | select from_unixtime(__time__ -__time__% 300) as time, avg(request_time) as avg_latency , max(request_time) as max_latency group by __time__ -__time__% 300 统计最大延时对应的请求页面: * | select from_unixtime(__time__ - __time__% 60) , max_by(request_uri,request_time) group by __time__ - __time__%60 统计请求延时的分布(分为10个桶进行近似分布直方统计): * |select numeric_histogram(10,request_time) 统计最大的十个延时: * | select max(request_time,10) 对延时最大的页面调优: request_uri:"/url2" | select count(1) as pv, approx_distinct(remote_addr) as uv, histogram(method) as method_pv, histogram(status) as status_pv, histogram(user_agent) as user_agent_pv, avg(request_time) as avg_latency, max(request_time) as max_latency 其他实例:: 统计过去1小时;登录次数最多的三个用户: login | SELECT regexp_extract(message, 'userID=(?[a-zA-Z\d]+)', 1) AS userID, count(*) as count GROUP BY userID ORDER BY count DESC LIMIT 3 统计过去15分钟;每个用户的付款总额: order | SELECT regexp_extract(message, 'userID=(?[a-zA-Z\d]+)', 1) AS userID, sum(cast(regexp_extract(message, 'amount=(?[a-zA-Z\d]+)', 1) AS double)) AS amount GROUP BY userID 在整个公司的人员中;获取每个人的薪水在部门内排名 * | select department, persionId, sallary , rank() over(PARTITION BY department order by sallary desc) as sallary_rank order by department,sallary_rank 在整个公司的人员中;获取每个人的薪水在部门内的占比 * | select department, persionId, sallary *1.0 / sum(sallary) over(PARTITION BY department ) as sallary_percentage 按天统计;获取每天UV相对前一天的增长情况 * | select day ,uv, uv *1.0 /(lag(uv,1,0) over() ) as diff_percentage from ( select approx_distinct(ip) as uv, date_trunc('day',__time__) as day from log group by day order by day asc ) 统计每小时的PV、UV和最高延时对应的用户请求,延时最高的10个延时: *|select date_trunc('hour',from_unixtime(__time__)) as time, count(1) as pv, approx_distinct(userid) as uv, max_by(url,latency) as top_latency_url, max(latency,10) as top_10_latency group by 1 order by time