Forráskód Böngészése

tmX crawler bugfixes

LH 4 éve
szülő
commit
a6f94941d3

+ 28 - 5
TM-ExchangeURLGrabber/Program.cs

@@ -13,9 +13,16 @@ namespace TM_ExchangeURLGrabber
         private static readonly HttpClient client = new HttpClient();
         private static readonly string magicXpath = "//input";
         private static readonly string magicXpath2 = "//select";
-        private static readonly string parameter = "_ctl3$PageTracks";
+        private static readonly Dictionary<String, String> parameters = new Dictionary<string, string>
+        {
+            {"united","_ctl3$PageTracks" },
+            {"tmnforever","_ctl3$PageTracks" },
+            {"nations","ctl03$PageTracks" },
+            {"sunrise","ctl03$PageTracks" },
+            {"original","ctl03$PageTracks" },
+        };
         private static readonly string template = "goto|{0}|20";
-        private static readonly int maxPages = Int32.MaxValue; //MAXINT on production run
+        private static readonly int maxPages = Int32.MaxValue; //Int32.MaxValue on production run
         private static string[] prefixes = { "united", "tmnforever", "nations", "sunrise", "original" };
 
 
@@ -25,14 +32,30 @@ namespace TM_ExchangeURLGrabber
             {
                 foreach (var prefix in prefixes)
                 {
-                    await Process(prefix);
+                    if (File.Exists("./" + prefix + ".txt"))
+                    {
+                        //skip if exists for now, in future a incremental handler, used for recovery on a botched run
+                        continue;
+                    }
+                    else
+                    {
+                        await Process(prefix);
+                    }
                 }
             }
             else
             {
                 foreach (var prefix in prefixes)
                 {
-                    await ProcessStage2(prefix);
+                    if (File.Exists("./" + prefix + "-stage2.txt"))
+                    {
+                        //skip if exists for now, in future a incremental handler, used for recovery on a botched run
+                        continue;
+                    }
+                    else
+                    {
+                        await ProcessStage2(prefix);
+                    }
                 }
             }
         }
@@ -188,7 +211,7 @@ namespace TM_ExchangeURLGrabber
                 Console.WriteLine(magic);
                 //postDict.Add(parameter, magic);
 
-                postDict.Add("__EVENTTARGET", parameter);
+                postDict.Add("__EVENTTARGET", parameters[prefix]);
                 postDict.Add("__EVENTARGUMENT", magic);
 
                 Console.WriteLine("############ BEGIN P0ST################");

+ 8 - 0
TM-ExchangeURLGrabber/Properties/launchSettings.json

@@ -0,0 +1,8 @@
+{
+  "profiles": {
+    "TM-ExchangeURLGrabber": {
+      "commandName": "Project",
+      "commandLineArgs": "stage1"
+    }
+  }
+}