1 require 'open-uri'
2 require 'iconv'
3 require 'ftools'
4
5 url = "http://anime.xunlei.com/Book/category/589"
6 targetPrefix = "http://images.mh.xunlei.com/origin/"
7
8 replaceToolFileName = "replace.rb"
9 urlFileName = "urls.lst"
10
11 class Volumn
12 attr_reader :name, :pageCount, :url
13 def initialize(name, pageCount, url)
14 @name = name
15 @pageCount = pageCount
16 @url = url
17 end
18 end
19
20
21
22 volumnContent = open(url).read
23 target_re = /<title>(.*)<\/title>/
24 m = target_re.match(volumnContent)
25 title = Iconv.conv("gbk","utf-8",m[1])
26 Dir.mkdir(title) if !File.directory?(title)
27
28 target_re = /<a href="(http:\/\/images.anime.xunlei.com\/book\/segment\/\d+\/\d+.html)"\D+>(\S+)<\/a>\((\d+)\S+\)/
29 m = target_re.match(volumnContent)
30 vList = Array.new
31 while m != nil
32 v = Volumn.new(Iconv.conv("gbk", "utf-8", m[2]), m[3], m[1])
33 vList << v
34 puts v.name
35 m = target_re.match(m.post_match)
36 end
37
38 vList.reverse
39
40 for v in vList
41 # create volumn directory
42 puts "Creating volumn directory for [#{v.name}]..."
43 puts
44 volumnDir = "#{title}/#{v.name}"
45 Dir.mkdir(volumnDir) if !File.exist?(volumnDir)
46
47 #copy replace.rb
48 puts "Copying replace tool to #{volumnDir}"
49 puts
50 File.copy(replaceToolFileName, "#{volumnDir}/#{replaceToolFileName}")
51
52 #get image urls
53 puts "Retriving image urls from page for volumn [#{v.name}]"
54 puts
55 volumnPageHtml = open(v.url).read
56 target_re = /images_arr\[(\d+)\] = \'(\w+\/\w+\.[a-zA-Z]{3,4})\'/
57 m = target_re.match(volumnPageHtml)
58 #Output urls
59 puts "Outputing image urls for volumn [#{v.name}]"
60 file = File.new("#{volumnDir}/#{urlFileName}", 'w')
61 while m != nil
62 imgUrl = "#{targetPrefix}#{m[2]}"
63 file.puts imgUrl
64 m = target_re.match(m.post_match)
65 end
66 file.close
67 puts
68 puts
69 end
70
71
Thursday, 10 July 2008
Subscribe to:
Post Comments (Atom)
0 comments:
Post a Comment