|
本帖最后由 GodelEscherBach 于 2014-5-31 14:58 编辑
[ol]# encoding: UTF-8 require 'net/http'require 'nokogiri'require 'colorize'require 'fileutils'require 'tempfile'require 'uri'require 'json' # RUNTIME# ==============# $ ruby -v# ruby 2.0.0p247 (2013-06-27 revision 41674) [x86_64-darwin12.4.0]#def fetch_siten = 1puts "index of page #{n} .".colorize(:yellow)site = "http://m.youjizz.com"until (posts_url = fetch_page(site,n)).nil?posts_url.each do |post_url|post_title = get_title_from post_urlif File.exist?("#{post_title}/manifest.json")puts "already exist #{post_title}".colorize(:red)nextend#FileUtils.rm_rf("#{post_title}")puts "fetch of #{post_title}".colorize(:green) will_download_images = []will_download_videos = [] downloaded_images = []downloaded_videos = [] post_images, post_videos = fetch_images_and_videos post_urlputs "discover #{post_images.count} images and #{post_videos.count} videos".colorize(:yellow) post_images.each_with_index do |image_url, index|image_filename = "#{post_title}_#{index + 1}#{get_ext_from image_url}"will_download_images image_filename, :url => image_url }end post_videos.each_with_index do |video_url, index|n = 1video_filename = "#{post_title}#{get_ext_from video_url}"while (will_download_videos.find_all{ |v| v[:filename] == video_filename }.count > 0)video_filename = "#{post_title}_x#{n}#{get_ext_from video_url}"endwill_download_videos video_filename, :url => video_url }end puts "create album dir of #{post_title}".colorize(:yellow)FileUtils.mkdir_p("#{post_title}/preview")will_download_images.each_with_index do |image, index|puts "downloading image #{image[:filename]}".colorize(:yellow)image_path = "#{post_title}/preview/#{image[:filename]}"download_file(image[:url], image_path) unless File.exist? image_pathdownloaded_images 0puts "download #{post_title} with #{failed_files.count}/#{files.count} failed ".colorize(:red)nextendputs "done , write to manifest.json".colorize(:yellow)manifest = {:title => post_title ,:url => post_url ,:images => will_download_images,:videos => will_download_videos}File.open("#{post_title}/manifest.json","w") do |f|f.write(manifest.to_json)end endputs "waiting for 3s to continue .".colorize(:blue)sleep 30n = n + 1endend def get_ext_from(res_url)File.extname(res_url).split('?')[0]end def fetch_page(site,num)posts_url = []page_url = "#{site}/page#{num}.html"uri = URI.parse(page_url)Net::HTTP.start(uri.host,uri.port) do |http|resp = http.get(uri.path)#parse xmldoc = Nokogiri::HTML(resp.body)posts = doc.css('.row .preview[href^=http]')posts.each do |post|post_url = post.attr('href')#now loading ...puts post_urlposts_url 复制代码 |
|