LH 7 лет назад
Родитель
Сommit
799f111d00
1 измененных файлов с 12 добавлено и 6 удалено
  1. 12 6
      QuickStuff/src/main/java/WallpapersHomeSpider/Main.java

+ 12 - 6
QuickStuff/src/main/java/WallpapersHomeSpider/Main.java

@@ -36,15 +36,17 @@ public class Main
     public static final String base = "https://wallpapershome.com";
     public static void main(String[] args) throws IOException
     {
+        System.setProperty("http.agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0"); 
         String[] categories={"space","stock-images","travel","hi-tech","games","cars-bikes","abstract","architecture","military","movies","nature","download-wallpapers"};
         for(String category:categories)
         {
             new File("T:\\downloadertemp\\"+category+"\\").mkdirs();
             List<String> sweepedWPs = new ArrayList<>();
-            sweepedWPs.addAll(sweepPage("https://wallpapershome.com/"+category+"/"));
+            sweepedWPs.addAll(sweepPage("https://wallpapershome.com/"+category+"/",category));
             sweepedWPs.forEach(System.out::println);
             System.out.println(sweepedWPs.size());
             int maxwp = sweepedWPs.size();
+
             for(int i=0;i<maxwp;i++)
             {
                 String wp =  sweepedWPs.get(i);
@@ -53,20 +55,24 @@ public class Main
                 String wpn = tmp[tmp.length-1];
                 File target = new File("T:\\downloadertemp\\"+category+"\\"+wpn);
                 URL u = new URL(wp);
-                FileUtils.copyURLToFile(u, target);
+                if(!target.exists())
+                    FileUtils.copyURLToFile(u, target);
             }
         }
         //
     }
     
-    public static List<String> sweepPage(final String pageurl)throws IOException
+    public static List<String> sweepPage(final String pageurl,String category)throws IOException
     {
-        String pageregex="page=(\\d*)";
+        
+        System.out.println(pageurl);
+        String pageregex="\\?page=(\\d*)";
         String page = grabPage(pageurl);
         Pattern pagecounter = Pattern.compile(pageregex);
         List<Integer> allMatches = new ArrayList<Integer>();
         Matcher m = pagecounter.matcher(page);
         while (m.find()) {
+          System.out.println(m.group());
           allMatches.add(Integer.parseInt(m.group(1)));
         }
         //allMatches.forEach(System.out::println);
@@ -113,7 +119,7 @@ public class Main
     {
         //Original Resolution:</span> <a href="/images/wallpapers/sun-2000x1250-planet-hd-16032.jpg">2000x1250</a>
         System.out.println("Sweeping:"+pageurl);
-        String pageregex="Original Resolution:</span> <a href=\"(/images/wallpapers/.*?\\d*\\.jpe?g)\">";
+        String pageregex="Original Resolution:</span> <a href=\"(/images/wallpapers/.*?\\d*\\.(jpe?g|png))\">";
         String page = grabPage(pageurl);
         Pattern pagecounter = Pattern.compile(pageregex);
         List<String> allMatches = new ArrayList<String>();
@@ -147,7 +153,7 @@ public class Main
             in.close();
         }
     }
-
+    //System.out.println(all);
     return all;
 }