require 'net/http'
require 'uri'
require 'set'
include Net

PROJECT = 'roller'
START_PAGE = 'Index'
OUT_DIR = '../manual'

module GetWiki
  BASE_URL = 'http://dev-heaven.net'
  BASE_WIKI_URL = "#{BASE_URL}/wiki"
  ATTACHMENTS_DIR = 'attachments'
  ATTACHMENT_EXTENSIONS = 'jpg|png|jpeg'

  EXPORT_HTML = '?export=html'

  # Fetch the wiki page, all those pages it links to in its own wiki, and all
  # attached images.
  public
  def self.fetch(project, page, out_dir, ignore = Set.new)

    # Get the full version so we can find attachments.
    uri = URI.parse("#{BASE_WIKI_URL}/#{project}/#{page}")
    text = HTTP.get(uri).to_s
    ignore.add page

    while text =~ /"\/(#{ATTACHMENTS_DIR}\/\d+\/([\w\-]+\.(?:#{ATTACHMENT_EXTENSIONS})))"/i
      uri = URI.parse("#{BASE_URL}/#{$1}")
      image_name = $2

      image = HTTP.get(uri)

      File.open("#{out_dir}/#{image_name}", 'wb') do |file|
        file.write(image)
      end

      puts "Got #{image_name}"

      text = $'
    end

    # Get the simple version for wiki links and actual download.
    uri = URI.parse("#{BASE_WIKI_URL}/#{project}/#{page}#{EXPORT_HTML}")
    text = HTTP.get(uri).to_s
    
    File.open("#{out_dir}/#{page}.html", 'w') do |file|
      file.puts text
    end

    while text =~ /<a href="(\w+)(?:#\w*)?\.html" class="wiki\-page"/
      name = $1

      fetch(project, name, out_dir, ignore) unless ignore.include? name

      text = $'
    end

    puts "Got #{page}.html"
  end
end

GetWiki.fetch PROJECT, START_PAGE, OUT_DIR, Set.new('Home')