# Ajax scanning script
# Author: Shreeraj Shah (shreeraj@blueinfy.com)

require 'open-uri'
require 'rexml/document'
require 'uri'
include REXML

if (ARGV.length != 1)
  puts "\nUsage:\n"
  puts "urlgrep.rb <Target URL>\n"
  puts "Example:\n"  
  puts "urlgrep.rb http://digg.com\n"  
  Kernel.exit(1)
end

# Grabbing the target
url = ARGV[0]
html = open(url)
page = html.read

# Path manipulation - Supporting absolute and starting with "/"
# More work to be done on this conversion

b_uri=html.base_uri
abspath=b_uri.scheme+"://"+b_uri.host
relpath =abspath 

# Variables for  processing
all_path = ""
scriptname = []
scriptcontent = []
sn=0

# Scanning for script in the target
puts "\n---Scanning for scripts---"
a_script=page.scan(/<script.*?>/)
a_script.each do |temp|
  if(temp.scan("src").length > 0)
    temp += "</script>"
    doc=Document.new temp 
    root = doc.root
    all_path += root.attributes["src"]+"|"
    puts root.attributes["src"]
  end
end

# Collecting all src files
puts "\n---Enumerating javascripts---"
a_path=all_path.split("|")
a_path.each do |temp|
    uri=URI.parse(temp)
    if(uri.absolute)
     tpage=open(temp)
     scriptname.push(temp)
     scriptcontent.push(tpage.read)
    else
     if(/^\//.match(temp))
       turi=abspath+temp
       tpage=open(turi)
       scriptname.push(turi)
       scriptcontent.push(tpage.read)       
     else
       turi=relpath+"/"+temp # More on this later
       tpage=open(turi)
       scriptname.push(temp)
       scriptcontent.push(tpage.read)
     end
    end        
  end


scriptname.each do |sname|
  puts sname
  p=scriptcontent[sn].split("\n")
  i=1
  p.each do |temp|
    # Grab URLs
    reg=Regexp.new(/^http\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/i)
    match=reg.match(temp)
    if match != nil
      puts "["+i.to_s+"]"+temp
    end
    reg=Regexp.new(/(\\[^\/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^\/:\*\?<>\|]+\.\w{2,6}) /i)
    match=reg.match(temp)
    if match != nil
      puts "["+i.to_s+"]"+temp
    end
    
     	

    i+=1   
  end
  sn += 1
end
