crawl_db.rb 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. require 'mechanize'
  2. BASE_URL = 'http://pripara.jp/item/'
  3. agent = Mechanize.new
  4. category_maps = {
  5. "アクセサリー" => :HairsCorde,
  6. "ヘアアクセ" => :HairsCorde,
  7. "トップス" => :TopsCorde,
  8. "ボトムス" => :BottomsCorde,
  9. "スカート" => :BottomsCorde,
  10. "シューズ" => :ShoesCorde,
  11. "ワンピース" => :OnePieceCorde
  12. }
  13. by_category = {
  14. HairsCorde: [],
  15. TopsCorde: [],
  16. BottomsCorde: [],
  17. ShoesCorde: [],
  18. OnePieceCorde: [],
  19. OtherCorde: []
  20. }
  21. %w(
  22. time1dan.html
  23. kami6dan.html
  24. kami5dan.html
  25. kami4dan.html
  26. kami3dan.html
  27. kami2dan.html
  28. kami1dan.html
  29. dream_kami5dan.html
  30. dream_kami4dan.html
  31. dream_kami3dan.html
  32. dream_kami2dan.html
  33. dream_kami1dan.html
  34. 2016promotion.html
  35. kami-charatomoticke.html
  36. dream_dreamparade.html
  37. dream2015_6th.html
  38. dream2015_5th.html
  39. dream2015_4th.html
  40. dream2015_3rd.html
  41. dream2015_2nd.html
  42. dream2015_1st.html
  43. dreamparade.html
  44. 2015_6th.html
  45. 2015_5th_01.html
  46. 2015_5th.html
  47. dream201511_4th.html
  48. 2015_4th.html
  49. 2015_3rd.html
  50. 2015_2nd.html
  51. 2015_1st.html
  52. memorial.html
  53. 2015limited_time.html
  54. 2015_encore.html
  55. 2015promotion.html
  56. 2015_3rd_03.html
  57. 2015_3rd_02.html
  58. 2015_3rd_01.html
  59. 2014_2nd_12.html
  60. 2014_2nd_11.html
  61. 2014_2nd.html
  62. 2014_1st.html
  63. limited_time.html
  64. 3rd_encore.html
  65. encore.html
  66. 2015_encore_02.html
  67. promotion.html
  68. cyalume.html
  69. ).each do |url|
  70. page = agent.get("#{BASE_URL}#{url}")
  71. page.search('div.categoryDetailList div.itemDateBlock').each do |corde|
  72. codename = corde.search('h2 span').text.gsub('★','')
  73. name = corde.search('h2').text.gsub('★','').gsub(codename, '')
  74. detail = corde.search('table td')
  75. brand = detail[2].search('img')
  76. brand = brand[0][:src].gsub('../img/item/icon_', '').gsub('.jpg', '') unless brand[0].nil?
  77. corde_data = {
  78. codename: codename,
  79. name: name,
  80. category: detail[0].text.to_s,
  81. mood: detail[1].text.to_s,
  82. brand: brand.to_s,
  83. rarity: detail[3].text.to_s,
  84. like: detail[4].text.to_s,
  85. color: detail[5].text.to_s
  86. }
  87. category = category_maps[corde_data[:category]]
  88. category = :OtherCorde if category.nil?
  89. by_category[category] << corde_data
  90. end
  91. sleep 1
  92. end
  93. source_code = ""
  94. by_category.each_pair do |klass, contents|
  95. source_code += "@#{klass.to_s.downcase}s = []\n"
  96. end
  97. by_category.each_pair do |klass, contents|
  98. contents.each do |content|
  99. source_code += "@#{klass.to_s.downcase}s << #{klass.to_s}.new('#{content[:codename]}','#{content[:name]}','#{content[:rarity]}','#{content[:like]}','#{content[:color]}','#{content[:brand]}')\n"
  100. end
  101. end
  102. print source_code