#!/usr/local/bin/ruby -w

# Log file parsers
class GenericLog
	attr_accessor :includes_duration, :queries
	attr_reader :time_to_parse
	def initialize(filename)
		@filename = filename
		@queries = []
		@includes_duration = false
	end
	def parse
		raise "Override this"
	end
	def normalize
		@queries.each {|q| q.normalize }
	end
end

class SyslogLine
	DASH_REGEX = Regexp.new("-")
	SPACE_REGEX = Regexp.new(" ")
	PG_REGEX = Regexp.new("postgres")
	attr_accessor :connection_id, :is_pg_log_entry
	def initialize(data)
		@is_pg_log_entry = false
		if !(data =~ PG_REGEX)
			return
		end
		@text = data.split(PG_REGEX)[1].split(SPACE_REGEX)[1..-1].join(" ").strip
		if @text[0] == nil
			return
		else
			@is_pg_log_entry = @text[0].chr == "["
		end	
		return if !@is_pg_log_entry
		@right_bracket = @text.index("]")
		if DASH_REGEX.match(@text[2, @right_bracket])
			@connection_id = @text[1,@text.index("-")-1]
		else
			@connection_id = @text[1,@right_bracket-1]
		end
	end
	def is_new_query
		@text[@right_bracket+2, 5] == "LOG: "
	end
	def parse_query_segment
		tmp = @text[@right_bracket+(is_new_query ? 15 : 2), 10000]
		return "" if tmp == nil
		tmp.gsub(/\^I/, "").chomp
	end
	def to_s
		@text
	end
end

class Syslog < GenericLog
	DEBUG=false
	def parse
		start = Time.new
		working = {}
		File.foreach(@filename) {|text|
			line = SyslogLine.new(text)
			next if !line.is_pg_log_entry
			puts line if DEBUG
			if line.is_new_query
				# if there's a query in that slot presently, close it out
				if working.has_key?(line.connection_id)
					@queries << working[line.connection_id]
					working.delete(line.connection_id)
					puts "Closed out a query, so far there are " + @queries.size if DEBUG
				end
				# put the query in the working table
				working[line.connection_id] = Query.new
				puts "Started a query" if DEBUG
			end
			working[line.connection_id].append(line.parse_query_segment)
			puts "Added to a working query: " + line.parse_query_segment if DEBUG
		}
		working.each_value {|q| @queries << q }
		@time_to_parse = Time.new - start
	end
end

class PostgresLogLine
	START_QUERY_LINE=Regexp.new("^LOG:[\s]*query:")
	DURATION_LINE=Regexp.new("^LOG:[\s]*duration:")
	STARTS_WITH_DATE=Regexp.new("^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] ")
	attr_reader :text, :is_new_query
	def initialize(text)	
		@text = STARTS_WITH_DATE.match(text) ? text.split(" ")[2..-1].join(" ").strip : text
		@is_new_query = START_QUERY_LINE.match(@text) != nil && @text.split(/LOG:\s*query: /).size > 1
	end
	def is_continuation
		/^\t/.match(@text) != nil
	end	
	def is_duration_line
		!DURATION_LINE.match(@text).nil?
	end
	def parse_duration_segment		
		@text.split(DURATION_LINE)[1].strip.split(" ")[0].strip.to_f
	end
	def parse_query_segment
		if @is_new_query
			return START_QUERY_LINE.match(@text).post_match.strip
		end
		@text.gsub(/\t/, "").chomp
	end
end

# this is really a State machine
class Accumulator
	attr_reader :queries
	def initialize
		@current = nil
		@queries = []
	end
	def new_query_start(txt)
		# new query starting, so put the old one in the array
		@queries << @current if !@current.nil?
		@current = Query.new(txt)
	end
	def query_continuation(txt)
		@current.append(txt) if !@current.nil?
	end
	def set_duration(d)
		if !@current.nil?
			@current.duration = d
			@queries << @current
			@current = nil
		end
	end
end

class PostgresLog < GenericLog
	DEBUG=false
	def parse
		start = Time.new
		a = Accumulator.new
		File.foreach(@filename) {|text|
			puts text if DEBUG
			next if text =~ /begin/i
			line = PostgresLogLine.new(text)
			if line.is_new_query
				a.new_query_start(line.parse_query_segment)
			elsif line.is_continuation
				a.query_continuation(line.parse_query_segment)
			elsif line.is_duration_line
				@includes_duration = true
				a.set_duration(line.parse_duration_segment)
			end
		}		
		@queries = a.queries
		@time_to_parse = Time.new - start
	end
end

# Query model
class Query
	REMOVE_STUFF_BTWN_SINGLE_QUOTES_REGEXP = Regexp.new("'[^']*'")
	attr_reader :text
	attr_accessor :duration
	def initialize(text="")
		@text = text
	end
	def append(txt)	
		@text << " " << txt
	end
	def normalize
		@text.gsub!(REMOVE_STUFF_BTWN_SINGLE_QUOTES_REGEXP, "''")
		@text.squeeze!(" ")	
		@text.strip!
		self
	end
	def to_s	
		@text
	end
	def is_select
		check(/^SELECT/i)
	end
	def is_delete
		check(/^DELETE/i)
	end
	def is_insert
		check(/^INSERT/i)
	end
	def is_update
		check(/^UPDATE/i)
	end
	def check(regexp)
		regexp.match(@text.strip) != nil
	end
end

# Reports 
class TextReportAggregator
	def create(reports)	
		rpt = ""
		reports.each {|r| rpt << r.text }
		return rpt
	end
end

class HTMLReportAggregator
	def create(reports) 
		rpt = "<html><head>"
		rpt =<<EOS
<style type="text/css">
body { background-color:white; }
h2 { text-align:center; }
h3 { color:blue }
p, td, th { font-family:Courier, Arial, Helvetica, sans-serif; font-size:14px; }
th { color:white; background-color:#7B8CBE; }
span.keyword { color:blue; }
</style>
EOS
#tr { background-color:#E1E8FD; }
		rpt << "<title>SQL Query Analysis (generated #{Time.now})</title></head><body>\n"
		rpt << "<h2>SQL Query Analysis (generated #{Time.now})</h2><br>\n"
		rpt << "<hr><center>"
		rpt << "<table><th>Reports</th>"
		reports.each_index {|x| 
			link = "<a href=\"#report#{x}\">#{reports[x].title}</a>"
			rpt << "<tr><td>#{link}</td></tr>"
		}
		rpt << "</table>"
		rpt << "<hr></center>"
		reports.each_index {|x| 
			rpt << "<a name=\"report#{x}\"> </a>"
			rpt << reports[x].html 
		}
		rpt << "</body></html>\n"
	end
end

class GenericReport
	def initialize(log)
		@log = log 
	end
	def colorize(txt)
		["SELECT","INSERT INTO","WHERE","VALUES","FROM","AND","ORDER BY","GROUP BY","LIMIT", "OFFSET", "DESC","ASC","AS"].each {|w| 
			txt = txt.gsub(Regexp.new(w), "<span class='keyword'>#{w}</span>")
		}
		["select","from","where"].each {|w| 
			txt = txt.gsub(Regexp.new(w), "<span class='keyword'>#{w}</span>")
		}
		txt
	end
	def title	
		"Unnamed report"
	end
	def pctg_of(a,b)
		(((a/b)*100.0).round)/100.0
	end
	def round(x, places)
		(x * 10.0 * places).round / (10.0 * places)
	end
end

class OverallStatsReport < GenericReport
	def html
		rpt = "<h3>#{title}</h3>\n"
		rpt << "#{@log.queries.size} queries\n"
		rpt << "<br>#{@log.queries.uniq.size} unique queries\n"
		if @log.includes_duration
			rpt << "<br>Total query duration was #{round(total_duration, 2)} seconds\n"
			longest = find_longest
			rpt << "<br>Longest query (#{colorize(longest.text)}) ran in #{"%2.3f" % longest.duration} seconds\n"
			shortest = find_shortest
			rpt << "<br>Shortest query (#{colorize(shortest.text)}) ran in #{"%2.3f" % shortest.duration} seconds\n"
		end
		rpt << "<br>Log file parsed in #{"%2.1f" % @log.time_to_parse} seconds\n"
	end
	def title	
		"Overall statistics"
	end
	def text
		"#{@log.queries.size} queries (#{@log.queries.uniq.size} unique, longest ran in #{find_longest} seconds) parsed in #{@log.time_to_parse} seconds\n"
	end
	def total_duration	
		tot = 0.0	
		@log.queries.each {|x| tot += (x.duration != nil) ? x.duration : 0 }
		tot
	end
	def find_shortest	
		q = Query.new("No queries found")
		q.duration = 10000.0
		@log.queries.each {|x| q = x if !x.duration.nil? && x.duration < q.duration }
		q
	end
	def find_longest	
		q = Query.new("No queries found")
		q.duration = 0.0
		@log.queries.each {|x| q = x if !x.duration.nil? && x.duration > q.duration }
		q
	end
end

class MostFrequentQueriesReport < GenericReport
	def initialize(log, top=DEFAULT_TOP)
		super(log)
		@top = top
	end
	def title	
		"Most frequent queries"
	end
	def html
		list = create_report
		rpt = "<h3>#{title} queries</h3>\n"
		rpt << "<table><tr><th>Rank</th><th>Times executed</th><th>Query text</th>\n"
		(list.size < @top ? list.size : @top).times {|x| 
				rpt << "<tr><td>#{x+1}</td><td>#{list[x][1]}</td><td>#{colorize(list[x][0])}</td></tr>\n" 
		}
		rpt << "</table>\n"
	end
	def text
		list = create_report
		rpt = "### " + @top.to_s + " most frequent queries\n"
		(list.size < @top ? list.size : @top).times {|x| 
				rpt << list[x][1].to_s + " times: " + list[x][0].to_s + "\n" 
		}
		return rpt
	end
	def create_report
		h = {}
		@log.queries.each {|q|
			h[q.text] = 0 if !h.has_key?(q.text)
			h[q.text] += 1
		}
		h.sort {|a,b| b[1] <=> a[1] }
	end
end

class LittleWrapper
	attr_accessor :total_duration, :count, :q
	def initialize(q)
		@q = q
		@total_duration = 0.0
		@count = 0
	end
	def add(q)
		return if q.duration.nil?
		@total_duration += q.duration
		@count += 1
	end
end

class QueriesThatTookUpTheMostTimeReport < GenericReport
	def initialize(log, top=DEFAULT_TOP)
		super(log)
		@top = top
	end
	def title	
		"Queries that took up the most time"
	end
	def html
		return "No duration data available" if !@log.includes_duration
		list = create_report
		rpt = "<h3>#{title}</h3>\n"
		rpt << "<table><tr><th>Rank</th><th>Total time</th><th>Times executed</th><th>Query text</th>\n"
		(list.size < @top ? list.size : @top).times {|x| 
				rpt << "<tr><td>#{x+1}</td><td>#{"%2.3f" % list[x][1].total_duration}</td><td align=right>#{list[x][1].count}</td><td>#{colorize(list[x][0])}</td></tr>\n" 
		}
		rpt << "</table>\n"
	end
	def text
		return "" if !@log.includes_duration
		"not implemented yet"
	end
	def create_report
		h = {}
		@log.queries.each {|q|
			if !h.has_key?(q.text)
				h[q.text] = LittleWrapper.new(q)
			end
			h[q.text].add(q)
		}
		h.sort {|a,b| b[1].total_duration <=> a[1].total_duration }
	end
end

class SlowestQueriesReport < GenericReport
	def initialize(log, top=DEFAULT_TOP)
		super(log)
		@top = top
	end
	def text
		return "" if !@log.includes_duration
		"not implemented yet"
	end
	def title	
		"Slowest queries"
	end
	def html
		return "" if !@log.includes_duration
		list = create_report
		rpt = "<h3>#{title}</h3>\n"
		rpt << "<table><tr><th>Rank</th><th>Time</th><th>Query text</th>\n"
		(list.size < @top ? list.size : @top).times {|x| 
				rpt << "<tr><td>#{x+1}</td><td>#{"%2.3f" % list[x].duration}</td><td>#{colorize(list[x].text)}</td></tr>\n" 
		}
		rpt << "</table>\n"
	end
	def create_report
		@log.queries.sort {|a,b| b.duration.to_f <=> a.duration.to_f }.slice(0,@top)
	end
end

class QueriesByTypeReport < GenericReport
	def title	
		"Queries by type"
	end
	def html
		sel,ins,upd,del=create_report
		rpt = "<h3>#{title}</h3>\n"
		rpt << "<table><tr><th>Type</th><th>Count</th><th>Percentage</th>\n"
		rpt << "<tr><td>SELECT</td><td>#{sel}</td><td align=center>#{pctg(sel)}</td></tr>\n" if sel > 0
		rpt << "<tr><td>INSERT</td><td>#{ins}</td><td align=center>#{pctg(ins)}</td></tr>\n" if ins > 0
		rpt << "<tr><td>UPDATE</td><td>#{upd}</td><td align=center>#{pctg(upd)}</td></tr>\n" if upd > 0
		rpt << "<tr><td>DELETE</td><td>#{del}</td><td align=center>#{pctg(del)}</td></tr>\n" if del > 0
		rpt << "</table>\n"
	end
	def text
		sel,ins,upd,del=create_report
		rpt = "### Queries by type\n"
		rpt << "SELECTs: #{sel.to_s.ljust(sel.to_s.size + 1)} (#{pctg(sel)}%)\n" if sel > 0
		rpt << "INSERTs: #{ins.to_s.ljust(sel.to_s.size + 1)} (#{pctg(ins)}%)\n" if ins > 0
		rpt << "UPDATEs: #{upd.to_s.ljust(upd.to_s.size + 1)} (#{pctg(upd)}%)\n" if upd > 0
		rpt << "DELETEs: #{del.to_s.ljust(sel.to_s.size + 1)} (#{pctg(del)}%)\n" if del > 0
		return rpt
	end
	def pctg(x)
		x > 0.0 ? (((x.to_f / @log.queries.size.to_f)*100).to_i) : 0
	end
	def create_report
		sel=ins=del=upd=0
		@log.queries.each {|q|
			if q.is_select 
				sel += 1
			elsif q.is_insert
				ins += 1
			elsif q.is_update
				upd += 1
			elsif q.is_delete
				del += 1
			end
		}
		[sel, ins, upd, del]
	end
end

DEFAULT_TOP=10

if __FILE__ == $0
	raise "Usage: " + $0 + " [-logtype syslog|pglog] [-top n] [-normalize] [-format text|html] -file some_log_file_name" if ARGV == nil or !ARGV.include?("-file")
	log = nil
	if ARGV.include?("-logtype") && ARGV[ARGV.index("-logtype")+1] == "pglog"
		log = PostgresLog.new(ARGV[ARGV.index("-file")+1])
	else
		log = Syslog.new(ARGV[ARGV.index("-file")+1])
	end
	log.parse
	log.normalize if ARGV.include?("-normalize")	
	top = (ARGV.include?("-top") ? ARGV[ARGV.index("-top")+1] : DEFAULT_TOP).to_i
	format = (ARGV.include?("-format") ? ARGV[ARGV.index("-format")+1] : "text")
	rpts = [OverallStatsReport.new(log), QueriesByTypeReport.new(log), QueriesThatTookUpTheMostTimeReport.new(log, top), SlowestQueriesReport.new(log, top), MostFrequentQueriesReport.new(log, top)] 
	agg = nil
	if format == "text"	
		agg = TextReportAggregator.new
	else
		agg = HTMLReportAggregator.new
	end
	puts agg.create(rpts)
end
