#!/usr/local/bin/ruby -w

# Log file parsers
class SyslogLine
	DASH_REGEX = Regexp.new("-")
	SPACE_REGEX = Regexp.new(" ")
	PG_REGEX = Regexp.new("postgres")
	attr_accessor :connection_id, :is_pg_log_entry
	def initialize(data)
		@is_pg_log_entry = false
		if !(data =~ PG_REGEX)
			return
		end
		@text = data.split(PG_REGEX)[1].split(SPACE_REGEX)[1..-1].join(" ").strip
		if @text[0] == nil
			return
		else
			@is_pg_log_entry = @text[0].chr == "["
		end	
		return if !@is_pg_log_entry
		@right_bracket = @text.index("]")
		if DASH_REGEX.match(@text[2, @right_bracket])
			@connection_id = @text[1,@text.index("-")-1]
		else
			@connection_id = @text[1,@right_bracket-1]
		end
	end
	def is_new_query
		@text[@right_bracket+2, 5] == "LOG: "
	end
	def parse_query_segment
		tmp = @text[@right_bracket+(is_new_query ? 15 : 2), 10000]
		return "" if tmp == nil
		tmp.gsub(/\^I/, "").chomp
	end
	def to_s
		@text
	end
end

class Syslog
	DEBUG=false
	attr_reader :time_to_parse, :queries
	def initialize(filename)
		@filename = filename
		@queries = []
	end
	def parse
		start = Time.new
		working = {}
		File.foreach(@filename) {|text|
			line = SyslogLine.new(text)
			next if !line.is_pg_log_entry
			puts line if DEBUG
			if line.is_new_query
				# if there's a query in that slot presently, close it out
				if working.has_key?(line.connection_id)
					@queries << working[line.connection_id]
					working.delete(line.connection_id)
					puts "Closed out a query, so far there are " + @queries.size if DEBUG
				end
				# put the query in the working table
				working[line.connection_id] = Query.new
				puts "Started a query" if DEBUG
			end
			working[line.connection_id].append(line.parse_query_segment)
			puts "Added to a working query: " + line.parse_query_segment if DEBUG
		}
		working.each_value {|q| @queries << q }
		@time_to_parse = Time.new - start
	end
	def normalize
		@queries.each {|q| q.normalize }
	end
end

class PostgresLogLine
	attr_reader :text
	QUERY_PREAMBLE=Regexp.new("LOG:[\s]*query:")
	QUERY_START=Regexp.new("^LOG:[\s]*query:")
	def initialize(text)	
		fields = text.split(" ")
		if fields.size > 3 && fields[2] == "LOG:"
			text = text.split(" ")[2..-1].join(" ") # snip the date
		end
		@text = text
	end
	def is_new_query
		QUERY_START.match(@text) != nil && @text.split(/LOG:\s*query: /).size > 1
	end
	def is_continuation
		/^\t/.match(@text) != nil
	end
	def parse_query_segment
		if is_new_query
			return @text.split(/LOG:\s*query: /)[1].strip
		end
		@text.gsub(/\t/, "").chomp
	end
end

# this is really a State machine
class Accumulator
	attr_reader :queries
	def initialize
		@current = nil
		@queries = []
	end
	def new_query_start(txt)
		# new query starting, so put the old one in the array
		@queries << @current if !@current.nil?
		@current = Query.new(txt)
	end
	def query_continuation(txt)
		# i.e., discard a partially formed query at the top of a log file
		return if @current.nil? 
		@current.append(txt)
	end
end

class PostgresLog
	DEBUG=false
	attr_reader :queries, :time_to_parse
	def initialize(filename)
		@filename = filename
		@queries = []
	end
	def parse
		start = Time.new
		a = Accumulator.new
		File.foreach(@filename) {|text|
			puts text if DEBUG
			line = PostgresLogLine.new(text)
			if line.is_new_query
				a.new_query_start(line.parse_query_segment)
			elsif line.is_continuation
				a.query_continuation(line.parse_query_segment)
			end
		}		
		@queries = a.queries
		@time_to_parse = Time.new - start
	end
	def normalize
		@queries.each {|q| q.normalize }
	end
end

# Query model
class Query
	REMOVE_STUFF_BTWN_SINGLE_QUOTES_REGEXP = Regexp.new("'[^']*'")
	attr_reader :text
	def initialize(text="")
		@text = text
	end
	def append(txt)	
		@text << " " << txt
	end
	def normalize
		@text.gsub!(REMOVE_STUFF_BTWN_SINGLE_QUOTES_REGEXP, "''")
		@text.squeeze!(" ")	
		@text.strip!
		self
	end
	def == other	
		@text == other.text
	end
	def eql? other	
		@text == other.text
	end
	def hash 
		@text.hash
	end
	def to_s	
		@text
	end
	def is_select
		check(/^SELECT/i)
	end
	def is_delete
		check(/^DELETE/i)
	end
	def is_insert
		check(/^INSERT/i)
	end
	def check(regexp)
		regexp.match(@text.strip) != nil
	end
end

# Reports 
class TextReportAggregator
	def create(reports)	
		rpt = ""
		reports.each {|r| rpt << r.text }
		return rpt
	end
end

class HTMLReportAggregator
	def create(reports) 
		rpt = "<html><head>"
		rpt =<<EOS
<style type="text/css">
body { background-color:white; }
h2 { text-align:center; }
h3 { color:blue }
p, td, th { font-family:Courier, Arial, Helvetica, sans-serif; font-size:14px; }
th { color:white; background-color:#7B8CBE; }
span.keyword { color:blue; }
</style>
EOS
#tr { background-color:#E1E8FD; }
		rpt << "<title>SQL Query Analysis (generated #{Time.now})</title></head><body>\n"
		rpt << "<h2>SQL Query Analysis (generated #{Time.now})</h2><br>\n"
		reports.each {|r| 
			rpt << r.html 
		}
		rpt << "</body></html>\n"
	end
end

class OverallStatsReport
	def initialize(log)
		@log = log 
	end
	def html
		rpt = "<h3>Overall statistics</h3>\n"
		rpt << "#{@log.queries.size} queries\n"
		rpt << "<br>#{@log.queries.uniq.size} unique queries\n"
		rpt << "<br>Parsed in #{((@log.time_to_parse * 100).round)/100.0} seconds\n"
	end
	def text
		@log.queries.size.to_s + " queries (" + @log.queries.uniq.size.to_s + " unique) parsed in " + @log.time_to_parse.to_s + " seconds\n"
	end
end

class MostFrequentQueriesReport
	def initialize(q,top=DEFAULT_TOP)
		@queries, @top = q, top
	end
	def html
		list = create_report
		rpt = "<h3>Most frequent queries</h3>\n"
		rpt << "<table><tr><th>Rank</th><th>Times executed</th><th>Query text</th>\n"
		(list.size < @top ? list.size : @top).times {|x| 
				rpt << "<tr><td>#{x+1}</td><td>#{list[x][1]}</td><td>#{colorize(list[x][0])}</td></tr>\n" 
		}
		rpt << "</table>\n"
	end
	def colorize(q)
		txt = q.text
		["SELECT","INSERT INTO","WHERE","VALUES","FROM","AND","ORDER BY","GROUP BY","LIMIT", "OFFSET", "DESC","ASC","AS"].each {|w| 
			txt.gsub!(Regexp.new(w), "<span class='keyword'>#{w}</span>")
		}
		["select","from","where"].each {|w| 
			txt.gsub!(Regexp.new(w), "<span class='keyword'>#{w}</span>")
		}
		txt
	end
	def text
		list = create_report
		rpt = "### " + @top.to_s + " most frequent queries\n"
		(list.size < @top ? list.size : @top).times {|x| 
				rpt << list[x][1].to_s + " times: " + list[x][0].to_s + "\n" 
		}
		return rpt
	end
	def create_report
		h = {}
		@queries.each {|q|
			h[q] = 0 if !h.has_key?(q)
			h[q] += 1
		}
		h.sort {|a,b| b[1] <=> a[1] }
	end
end

class QueriesByTypeReport
	def initialize(q)
		@queries = q
	end
	def html
		rpt = "<h3>Queries by type</h3>\n"
		rpt << "<table><tr><th>Type</th><th>Count</th><th>Percentage</th>\n"
		sel,ins,del=create_report
		rpt << "<tr>"	
		rpt << "<td>SELECT</td><td>#{sel}</td><td align=center>#{((sel.to_f/@queries.size.to_f)*100).to_i}</td>\n"
		rpt << "</tr>"	
		rpt << "<tr>"	
		rpt << "<td>INSERT</td><td>#{ins}</td><td align=center>#{((ins.to_f/@queries.size.to_f)*100).to_i}</td>\n"
		rpt << "</tr>"	
		rpt << "<tr>"	
		rpt << "<td>DELETE</td><td>#{del}</td><td align=center>#{((del.to_f/@queries.size.to_f)*100).to_i}</td>\n"
		rpt << "</tr>"	
		rpt << "</table>\n"
	end
	def text
		sel,ins,del=create_report
		rpt = "### Queries by type\n"
		rpt <<  "SELECTs: " + sel.to_s.ljust(sel.to_s.size + 1) + " (" + ((sel.to_f/@queries.size.to_f)*100).to_i.to_s  + "%)\n"
		rpt << "INSERTs: " + ins.to_s.ljust(sel.to_s.size + 1) + " (" + ((ins.to_f/@queries.size.to_f)*100).to_i.to_s  + "%)\n"
		rpt << "DELETEs: " + del.to_s.ljust(sel.to_s.size + 1) + " (" + ((del.to_f/@queries.size.to_f)*100).to_i.to_s  + "%)\n"
		return rpt
	end
	def create_report
		sel=ins=del=0
		@queries.each {|q|
			sel +=1 if q.is_select 
			ins +=1 if q.is_insert 
			del +=1 if q.is_delete 
		}
		[sel, ins, del]
	end
end

DEFAULT_TOP=10

if __FILE__ == $0
	raise "Usage: " + $0 + " [-logtype syslog|pglog] [-top n] [-normalize] [-format text|html] -file some_log_file_name" if ARGV == nil or !ARGV.include?("-file")
	log = nil
	if ARGV.include?("-logtype") && ARGV[ARGV.index("-logtype")+1] == "pglog"
		log = PostgresLog.new(ARGV[ARGV.index("-file")+1])
	else
		log = Syslog.new(ARGV[ARGV.index("-file")+1])
	end
	log.parse
	log.normalize if ARGV.include?("-normalize")	
	top = (ARGV.include?("-top") ? ARGV[ARGV.index("-top")+1] : DEFAULT_TOP).to_i
	format = (ARGV.include?("-format") ? ARGV[ARGV.index("-format")+1] : "text")
	rpts = [OverallStatsReport.new(log), QueriesByTypeReport.new(log.queries), MostFrequentQueriesReport.new(log.queries, top)] 
	agg = nil
	if format == "text"	
		agg = TextReportAggregator.new
	else
		agg = HTMLReportAggregator.new
	end
	puts agg.create(rpts)
end
