Przeglądaj źródła

足球,篮球赛果抓取规则更新

Your Name 6 lat temu
rodzic
commit
5441b90e13
4 zmienionych plików z 383 dodań i 302 usunięć
  1. 126 197
      hg3535/.idea/workspace.xml
  2. 2 2
      hg3535/main.py
  3. 137 0
      hg3535/spiders/other_saiguo.py
  4. 118 103
      hg3535/spiders/saiguo.py

+ 126 - 197
hg3535/.idea/workspace.xml

@@ -2,10 +2,10 @@
 <project version="4">
   <component name="ChangeListManager">
     <list default="true" id="7fa42e97-ddea-4404-91db-5a9ccc162649" name="Default Changelist" comment="更改爬取url域名">
+      <change afterPath="$PROJECT_DIR$/spiders/other_saiguo.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/pipeline/roll_lanqiu.py" beforeDir="false" afterPath="$PROJECT_DIR$/pipeline/roll_lanqiu.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/settings.py" beforeDir="false" afterPath="$PROJECT_DIR$/settings.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/spiders/saiguo.py" beforeDir="false" afterPath="$PROJECT_DIR$/spiders/saiguo.py" afterDir="false" />
     </list>
     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
     <option name="SHOW_DIALOG" value="false" />
@@ -15,7 +15,7 @@
   </component>
   <component name="CoverageDataManager">
     <SUITE FILE_PATH="coverage/hg3535$items.coverage" NAME="items Coverage Results" MODIFIED="1554291398794" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
-    <SUITE FILE_PATH="coverage/hg3535$main.coverage" NAME="main Coverage Results" MODIFIED="1569036654530" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
+    <SUITE FILE_PATH="coverage/hg3535$main.coverage" NAME="main Coverage Results" MODIFIED="1569405209751" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
     <SUITE FILE_PATH="coverage/hg3535$change.coverage" NAME="change Coverage Results" MODIFIED="1567749757856" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
     <SUITE FILE_PATH="coverage/hg3535$hgjieshu.coverage" NAME="hgjieshu Coverage Results" MODIFIED="1558494720094" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/spiders" />
     <SUITE FILE_PATH="coverage/hg3535$halffull.coverage" NAME="halffull Coverage Results" MODIFIED="1554544334424" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/spiders" />
@@ -45,10 +45,10 @@
   <component name="FileEditorManager">
     <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
       <file pinned="false" current-in-tab="true">
-        <entry file="file://$PROJECT_DIR$/pipeline/roll_lanqiu.py">
+        <entry file="file://$PROJECT_DIR$/spiders/saiguo.py">
           <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="402">
-              <caret line="126" column="21" selection-start-line="126" selection-start-column="21" selection-end-line="126" selection-end-column="21" />
+            <state relative-caret-position="-693">
+              <caret line="84" column="18" lean-forward="true" selection-start-line="84" selection-start-column="18" selection-end-line="84" selection-end-column="18" />
               <folding>
                 <element signature="e#0#15#0" expanded="true" />
               </folding>
@@ -56,87 +56,6 @@
           </provider>
         </entry>
       </file>
-      <file pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/utils/helper.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="178">
-              <caret line="13" selection-start-line="13" selection-end-line="13" />
-              <folding>
-                <element signature="e#0#14#0" expanded="true" />
-              </folding>
-            </state>
-          </provider>
-        </entry>
-      </file>
-      <file pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/pipeline/zuqiu.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="2600">
-              <caret line="130" column="57" selection-start-line="130" selection-start-column="57" selection-end-line="130" selection-end-column="57" />
-              <folding>
-                <element signature="e#0#15#0" expanded="true" />
-              </folding>
-            </state>
-          </provider>
-        </entry>
-      </file>
-      <file pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/spiders/roll_zuqiu.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="880">
-              <caret line="44" column="45" selection-start-line="44" selection-start-column="45" selection-end-line="44" selection-end-column="45" />
-              <folding>
-                <element signature="e#38#53#0" expanded="true" />
-              </folding>
-            </state>
-          </provider>
-        </entry>
-      </file>
-      <file pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/main.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="300">
-              <caret line="15" column="43" selection-start-line="15" selection-start-column="43" selection-end-line="15" selection-end-column="43" />
-              <folding>
-                <element signature="e#0#10#0" expanded="true" />
-              </folding>
-            </state>
-          </provider>
-        </entry>
-      </file>
-      <file pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/spiders/zuqiu.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="480">
-              <caret line="24" column="95" selection-start-line="24" selection-start-column="95" selection-end-line="24" selection-end-column="95" />
-              <folding>
-                <element signature="e#24#35#0" expanded="true" />
-              </folding>
-            </state>
-          </provider>
-        </entry>
-      </file>
-      <file pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/spiders/roll_lanqiu.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="277">
-              <caret line="86" selection-start-line="86" selection-end-line="86" />
-              <folding>
-                <element signature="e#24#39#0" expanded="true" />
-              </folding>
-            </state>
-          </provider>
-        </entry>
-      </file>
-      <file pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/settings.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="257">
-              <caret line="147" column="58" lean-forward="true" selection-start-line="147" selection-start-column="58" selection-end-line="147" selection-end-column="58" />
-            </state>
-          </provider>
-        </entry>
-      </file>
     </leaf>
   </component>
   <component name="FileTemplateManagerImpl">
@@ -232,7 +151,6 @@
   <component name="IdeDocumentHistory">
     <option name="CHANGED_PATHS">
       <list>
-        <option value="$PROJECT_DIR$/log/zuqiu.log" />
         <option value="$PROJECT_DIR$/spiders/hg3535_wangqiu.py" />
         <option value="$PROJECT_DIR$/spiders/wqbodan.py" />
         <option value="$PROJECT_DIR$/spiders/hg3535_zuqiu.py" />
@@ -256,7 +174,6 @@
         <option value="$PROJECT_DIR$/.gitignore" />
         <option value="$PROJECT_DIR$/utils/helper.py" />
         <option value="$PROJECT_DIR$/pipeline/ball_func.py" />
-        <option value="$PROJECT_DIR$/spiders/saiguo.py" />
         <option value="$PROJECT_DIR$/items.py" />
         <option value="$PROJECT_DIR$/change.py" />
         <option value="$PROJECT_DIR$/pipeline/wangqiu.py" />
@@ -281,16 +198,18 @@
         <option value="$PROJECT_DIR$/spiders/roll_zuqiu.py" />
         <option value="$PROJECT_DIR$/spiders/zuqiu.py" />
         <option value="$PROJECT_DIR$/pipeline/roll_lanqiu.py" />
-        <option value="$PROJECT_DIR$/main.py" />
         <option value="$PROJECT_DIR$/settings.py" />
+        <option value="$PROJECT_DIR$/spiders/other_saiguo.py" />
+        <option value="$PROJECT_DIR$/main.py" />
+        <option value="$PROJECT_DIR$/spiders/saiguo.py" />
       </list>
     </option>
   </component>
   <component name="ProjectFrameBounds" extendedState="6">
-    <option name="x" value="663" />
-    <option name="y" value="31" />
-    <option name="width" value="1193" />
-    <option name="height" value="1014" />
+    <option name="x" value="-143" />
+    <option name="y" value="201" />
+    <option name="width" value="1940" />
+    <option name="height" value="1024" />
   </component>
   <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
   <component name="ProjectView">
@@ -298,7 +217,6 @@
       <foldersAlwaysOnTop value="true" />
     </navigator>
     <panes>
-      <pane id="Scope" />
       <pane id="ProjectPane">
         <subPane>
           <expand>
@@ -325,6 +243,7 @@
           <select />
         </subPane>
       </pane>
+      <pane id="Scope" />
     </panes>
   </component>
   <component name="PropertiesComponent">
@@ -336,14 +255,14 @@
     <property name="settings.editor.selected.configurable" value="editor.preferences.fonts.default" />
   </component>
   <component name="RecentsManager">
-    <key name="MoveFile.RECENT_KEYS">
-      <recent name="C:\Users\admin\Desktop\sports_scrapy\hg3535" />
-      <recent name="C:\Users\admin\Desktop\sports_scrapy\hg3535\spiders" />
-    </key>
     <key name="CopyFile.RECENT_KEYS">
-      <recent name="C:\Users\admin\Desktop\sports_scrapy\hg3535\pipeline" />
       <recent name="C:\Users\admin\Desktop\sports_scrapy\hg3535\spiders" />
+      <recent name="C:\Users\admin\Desktop\sports_scrapy\hg3535\pipeline" />
+      <recent name="C:\Users\admin\Desktop\sports_scrapy\hg3535" />
+    </key>
+    <key name="MoveFile.RECENT_KEYS">
       <recent name="C:\Users\admin\Desktop\sports_scrapy\hg3535" />
+      <recent name="C:\Users\admin\Desktop\sports_scrapy\hg3535\spiders" />
     </key>
   </component>
   <component name="RunDashboard">
@@ -633,14 +552,8 @@
       <workItem from="1568249379425" duration="5234000" />
       <workItem from="1568604419513" duration="11095000" />
       <workItem from="1568970792599" duration="3014000" />
-      <workItem from="1569036019518" duration="526000" />
-    </task>
-    <task id="LOCAL-00042" summary="updata">
-      <created>1567497332885</created>
-      <option name="number" value="00042" />
-      <option name="presentableId" value="LOCAL-00042" />
-      <option name="project" value="LOCAL" />
-      <updated>1567497332885</updated>
+      <workItem from="1569036019518" duration="1144000" />
+      <workItem from="1569287788365" duration="45283000" />
     </task>
     <task id="LOCAL-00043" summary="updata">
       <created>1567507381447</created>
@@ -978,11 +891,18 @@
       <option name="project" value="LOCAL" />
       <updated>1568973359234</updated>
     </task>
-    <option name="localTasksCounter" value="91" />
+    <task id="LOCAL-00091" summary="更改爬取url域名">
+      <created>1569036726870</created>
+      <option name="number" value="00091" />
+      <option name="presentableId" value="LOCAL-00091" />
+      <option name="project" value="LOCAL" />
+      <updated>1569036726870</updated>
+    </task>
+    <option name="localTasksCounter" value="92" />
     <servers />
   </component>
   <component name="TimeTrackingManager">
-    <option name="totallyTimeSpent" value="1944998000" />
+    <option name="totallyTimeSpent" value="1990899000" />
   </component>
   <component name="TodoView">
     <todo-panel id="selected-file">
@@ -995,14 +915,13 @@
   </component>
   <component name="ToolWindowManager">
     <frame x="-8" y="-8" width="1936" height="1066" extended-state="6" />
-    <editor active="true" />
     <layout>
-      <window_info active="true" content_ui="combo" id="Project" order="0" sideWeight="0.71173847" visible="true" weight="0.16044776" />
+      <window_info active="true" content_ui="combo" id="Project" order="0" sideWeight="0.71173847" visible="true" weight="0.16204691" />
       <window_info id="Structure" order="1" sideWeight="0.2882615" side_tool="true" weight="0.1108742" />
       <window_info id="Favorites" order="2" sideWeight="0.5013405" side_tool="true" weight="0.108208954" />
       <window_info anchor="bottom" id="Message" order="0" />
       <window_info anchor="bottom" id="Find" order="1" weight="0.32832617" />
-      <window_info anchor="bottom" id="Debug" order="2" sideWeight="0.49573562" visible="true" weight="0.4388412" />
+      <window_info anchor="bottom" id="Debug" order="2" sideWeight="0.49573562" visible="true" weight="0.28004292" />
       <window_info anchor="bottom" id="Cvs" order="3" weight="0.25" />
       <window_info anchor="bottom" id="Inspection" order="4" weight="0.4" />
       <window_info anchor="bottom" id="TODO" order="5" weight="0.32832617" />
@@ -1012,7 +931,7 @@
       <window_info anchor="bottom" id="Event Log" order="9" sideWeight="0.5042644" side_tool="true" weight="0.2725322" />
       <window_info anchor="bottom" x="22" y="376" width="1876" height="298" id="Terminal" order="10" sideWeight="0.49307036" weight="0.15987125" />
       <window_info anchor="bottom" id="Python Console" order="11" sideWeight="0.49946696" weight="0.37017167" />
-      <window_info anchor="bottom" id="Run" order="12" sideWeight="0.87473345" weight="0.39592275" />
+      <window_info anchor="bottom" id="Run" order="12" sideWeight="0.87473345" weight="0.32403433" />
       <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
       <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
       <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
@@ -1138,26 +1057,6 @@
           <line>277</line>
           <option name="timeStamp" value="71" />
         </line-breakpoint>
-        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
-          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
-          <line>86</line>
-          <option name="timeStamp" value="100" />
-        </line-breakpoint>
-        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
-          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
-          <line>89</line>
-          <option name="timeStamp" value="102" />
-        </line-breakpoint>
-        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
-          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
-          <line>84</line>
-          <option name="timeStamp" value="104" />
-        </line-breakpoint>
-        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
-          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
-          <line>47</line>
-          <option name="timeStamp" value="119" />
-        </line-breakpoint>
         <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
           <url>file://$PROJECT_DIR$/spiders/roll_lanqiu.py</url>
           <line>63</line>
@@ -1238,11 +1137,6 @@
           <line>86</line>
           <option name="timeStamp" value="271" />
         </line-breakpoint>
-        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
-          <url>file://$PROJECT_DIR$/spiders/roll_lanqiu.py</url>
-          <line>43</line>
-          <option name="timeStamp" value="272" />
-        </line-breakpoint>
         <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
           <url>file://$PROJECT_DIR$/pipeline/guanjun.py</url>
           <line>122</line>
@@ -1263,6 +1157,41 @@
           <line>120</line>
           <option name="timeStamp" value="279" />
         </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
+          <line>42</line>
+          <option name="timeStamp" value="310" />
+        </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
+          <line>37</line>
+          <option name="timeStamp" value="312" />
+        </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
+          <line>31</line>
+          <option name="timeStamp" value="313" />
+        </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
+          <line>99</line>
+          <option name="timeStamp" value="317" />
+        </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
+          <line>111</line>
+          <option name="timeStamp" value="318" />
+        </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
+          <line>134</line>
+          <option name="timeStamp" value="319" />
+        </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/spiders/saiguo.py</url>
+          <line>85</line>
+          <option name="timeStamp" value="321" />
+        </line-breakpoint>
       </breakpoints>
       <default-breakpoints>
         <breakpoint type="python-exception">
@@ -1290,20 +1219,6 @@
     </expressions>
   </component>
   <component name="editorHistoryManager">
-    <entry file="file://$USER_HOME$/AppData/Local/Programs/Python/Python37/Lib/asyncio/tasks.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="145">
-          <caret line="349" selection-start-line="349" selection-end-line="349" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://C:/venv/Lib/site-packages/twisted/internet/task.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="195">
-          <caret line="630" selection-start-line="630" selection-end-line="630" />
-        </state>
-      </provider>
-    </entry>
     <entry file="file://C:/venv/Lib/site-packages/pymongo/mongo_client.py">
       <provider selected="true" editor-type-id="text-editor">
         <state relative-caret-position="-16140">
@@ -1448,16 +1363,6 @@
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/spiders/saiguo.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="260">
-          <caret line="18" selection-start-line="18" selection-end-line="19" />
-          <folding>
-            <element signature="e#0#15#0" expanded="true" />
-          </folding>
-        </state>
-      </provider>
-    </entry>
     <entry file="file://$PROJECT_DIR$/spiders/liansai.py">
       <provider selected="true" editor-type-id="text-editor">
         <state relative-caret-position="400">
@@ -1508,13 +1413,6 @@
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/middlewares.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="38">
-          <caret line="137" selection-start-line="137" selection-end-line="137" />
-        </state>
-      </provider>
-    </entry>
     <entry file="file://$PROJECT_DIR$/pipeline/guanjun.py">
       <provider selected="true" editor-type-id="text-editor">
         <state relative-caret-position="520">
@@ -1621,19 +1519,26 @@
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/pipeline/zuqiu.py">
+    <entry file="file://$PROJECT_DIR$/spiders/zuqiu.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="2600">
-          <caret line="130" column="57" selection-start-line="130" selection-start-column="57" selection-end-line="130" selection-end-column="57" />
+        <state relative-caret-position="260">
+          <caret line="24" column="95" selection-start-line="24" selection-start-column="95" selection-end-line="24" selection-end-column="95" />
           <folding>
-            <element signature="e#0#15#0" expanded="true" />
+            <element signature="e#24#35#0" expanded="true" />
           </folding>
         </state>
       </provider>
     </entry>
+    <entry file="file://$PROJECT_DIR$/settings.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="2960">
+          <caret line="148" column="56" selection-start-line="148" selection-start-column="56" selection-end-line="148" selection-end-column="56" />
+        </state>
+      </provider>
+    </entry>
     <entry file="file://$PROJECT_DIR$/spiders/roll_zuqiu.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="880">
+        <state relative-caret-position="842">
           <caret line="44" column="45" selection-start-line="44" selection-start-column="45" selection-end-line="44" selection-end-column="45" />
           <folding>
             <element signature="e#38#53#0" expanded="true" />
@@ -1641,57 +1546,81 @@
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/spiders/zuqiu.py">
+    <entry file="file://$PROJECT_DIR$/utils/helper.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="480">
-          <caret line="24" column="95" selection-start-line="24" selection-start-column="95" selection-end-line="24" selection-end-column="95" />
+        <state relative-caret-position="260">
+          <caret line="13" selection-start-line="13" selection-end-line="13" />
           <folding>
-            <element signature="e#24#35#0" expanded="true" />
+            <element signature="e#0#14#0" expanded="true" />
           </folding>
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/main.py">
+    <entry file="file://$PROJECT_DIR$/pipeline/roll_lanqiu.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="300">
-          <caret line="15" column="43" selection-start-line="15" selection-start-column="43" selection-end-line="15" selection-end-column="43" />
+        <state relative-caret-position="60">
+          <caret line="12" selection-start-line="12" selection-end-line="12" />
           <folding>
-            <element signature="e#0#10#0" expanded="true" />
+            <element signature="e#0#15#0" expanded="true" />
           </folding>
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/spiders/roll_lanqiu.py">
+    <entry file="file://$PROJECT_DIR$/spiders/other_saiguo.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="460">
+          <caret line="40" column="34" lean-forward="true" selection-start-line="37" selection-end-line="42" selection-end-column="48" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/middlewares.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="461">
+          <caret line="152" selection-start-line="152" selection-end-line="152" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://C:/venv/Lib/site-packages/scrapy/http/response/text.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="145">
+          <caret line="118" selection-start-line="118" selection-end-line="118" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/pipeline/zuqiu.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="277">
-          <caret line="86" selection-start-line="86" selection-end-line="86" />
+        <state relative-caret-position="642">
+          <caret line="63" column="8" lean-forward="true" selection-start-line="63" selection-start-column="8" selection-end-line="63" selection-end-column="14" />
           <folding>
-            <element signature="e#24#39#0" expanded="true" />
+            <element signature="e#0#15#0" expanded="true" />
           </folding>
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/utils/helper.py">
+    <entry file="file://$PROJECT_DIR$/spiders/roll_lanqiu.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="178">
-          <caret line="13" selection-start-line="13" selection-end-line="13" />
+        <state relative-caret-position="165">
+          <caret line="43" selection-start-line="43" selection-end-line="43" />
           <folding>
-            <element signature="e#0#14#0" expanded="true" />
+            <element signature="e#24#39#0" expanded="true" />
           </folding>
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/settings.py">
+    <entry file="file://$PROJECT_DIR$/main.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="257">
-          <caret line="147" column="58" lean-forward="true" selection-start-line="147" selection-start-column="58" selection-end-line="147" selection-end-column="58" />
+        <state relative-caret-position="400">
+          <caret line="20" column="49" lean-forward="true" selection-start-line="20" selection-start-column="49" selection-end-line="20" selection-end-column="49" />
+          <folding>
+            <element signature="e#0#10#0" expanded="true" />
+          </folding>
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/pipeline/roll_lanqiu.py">
+    <entry file="file://$PROJECT_DIR$/spiders/saiguo.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="402">
-          <caret line="126" column="21" selection-start-line="126" selection-start-column="21" selection-end-line="126" selection-end-column="21" />
+        <state relative-caret-position="-693">
+          <caret line="84" column="18" lean-forward="true" selection-start-line="84" selection-start-column="18" selection-end-line="84" selection-end-column="18" />
           <folding>
             <element signature="e#0#15#0" expanded="true" />
           </folding>

+ 2 - 2
hg3535/main.py

@@ -12,12 +12,12 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 # execute(["scrapy", "crawl", "wqbodan"])
 # execute(["scrapy", "crawl", "bangqiu"])
 # execute(["scrapy", "crawl", "roll_zuqiu"]) # 滚球足球 回来要解开这个注释 其他全部解封
-execute(["scrapy", "crawl", "roll_lanqiu"]) #滚球篮球
+# execute(["scrapy", "crawl", "roll_lanqiu"]) #滚球篮球
 # execute(["scrapy", "crawl", "roll_wangqiu"]) #滚球网球
 # execute(["scrapy", "crawl", "roll_bangqiu"])  # 滚球棒球
 # execute(["scrapy", "crawl", "ball_status"]) #滚球id本地存
 # execute(["scrapy", "crawl", "ball_status_update"]) #滚球id结束时间更新状态
-# execute(["scrapy", "crawl", "saiguo"]) #滚球id结束时间更新状态
+execute(["scrapy", "crawl", "saiguo"]) #滚球id结束时间更新状态
 # execute(["scrapy", "crawl", "jieshu"]) #滚球id结束时间更新状态
 # execute(["scrapy", "crawl", "wq_jieshu"]) #滚球id结束时间更新状态
 # execute(["scrapy", "crawl", "bq_jieshu"]) #滚球id结束时间更新状态

+ 137 - 0
hg3535/spiders/other_saiguo.py

@@ -0,0 +1,137 @@
+import datetime
+import re
+
+import scrapy
+
+from ..items import Hgsaiguo
+
+
+class HgjieshuSpider(scrapy.Spider):
+    name = 'other_saiguo'
+    to_day = datetime.datetime.now()
+    allowed_domains = ['hg3535z.com']
+    custom_settings = {
+        "ITEM_PIPELINES": {
+            'hg3535.pipeline.saiguo.Jieshuqiupipeline': 300,
+        },
+        # 'LOG_LEVEL': 'DEBUG',
+        # 'LOG_FILE': "../hg3535/log/saiguo{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
+    }
+
+    def start_requests(self):
+        for y in range(1, 5):
+            url = 'https://www.hg3535.cn/zh-cn/info-centre/sportsbook-info/results/{}/normal/1'.format(y)
+            yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={'pt': y})
+
+    def parse(self, response):
+        if response.status == 200:
+            pt = response.meta['pt']
+            if pt == 1:
+                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
+                # 获得所有比赛id对象
+                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
+                # 所有比赛id列表
+                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
+                temascore_list = []
+                for score in tema_score:
+                    # 正则匹配规则
+                    p1 = r"\d{1,3}-\d{1,3}"
+                    pattern1 = re.compile(p1)
+                    try:
+                        # 获取正则匹配结果
+                        c = pattern1.findall(score.extract())[0]
+                        temascore_list.append(c)
+                    except:
+                        c = ""
+                        temascore_list.append(c)
+                # 赛事id,赛事比元组列表
+                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
+                for y in tema_tupe:
+                    if y[1]:
+                        item = Hgsaiguo()
+                        item['id_score'] = y
+                        item['pt'] = pt
+                        yield item
+            if pt == 2:
+                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
+                # 获得所有比赛id对象
+                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
+                # 所有比赛id列表
+                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
+                temascore_list = []
+                for score in tema_score:
+                    # 正则匹配规则
+                    p1 = r"\d{1,3}-\d{1,3}"
+                    pattern1 = re.compile(p1)
+                    try:
+                        # 获取正则匹配结果
+                        c = pattern1.findall(score.extract())[0]
+                        temascore_list.append(c)
+                    except:
+                        c = ""
+                        temascore_list.append(c)
+                # 赛事id,赛事比元组列表
+                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
+                for y in tema_tupe:
+                    if y[1]:
+                        item = Hgsaiguo()
+                        item['id_score'] = y
+                        item['pt'] = pt
+                        yield item
+
+            if pt == 3:
+                # 获得所有比赛获胜人,判断赛事是否结束
+                # tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/text()')
+                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/@title')
+                # 获得所有比赛id对象
+                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
+                # 所有比赛id列表
+                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
+                temascore_list = []
+                for score in tema_score:
+                    # 正则匹配规则
+                    # p1 = r"\d{1,3}-\d{1,3}"
+                    # pattern1 = re.compile(p1)
+                    try:
+                        # 获取正则匹配结果
+                        c = score.extract()
+                        temascore_list.append(c)
+                    except:
+                        c = ""
+                        temascore_list.append(c)
+                # 赛事id,赛事比元组列表
+                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
+                for y in tema_tupe:
+                    if y[1]:
+                        item = Hgsaiguo()
+                        item['id_score'] = y
+                        item['pt'] = pt
+                        yield item
+
+            if pt == 4:
+                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
+                # 获得所有比赛id对象
+                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
+                # str.replace()
+                # 所有比赛id列表
+                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
+                temascore_list = []
+                for score in tema_score:
+                    # 正则匹配规则
+                    p1 = r"\d{1,3}-\d{1,3}"
+                    pattern1 = re.compile(p1)
+                    try:
+                        # 获取正则匹配结果
+                        c = pattern1.findall(score.extract())[0]
+                        temascore_list.append(c)
+                    except:
+                        c = ""
+                        temascore_list.append(c)
+                # 赛事id,赛事比元组列表
+                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
+                for y in tema_tupe:
+                    if y[1]:
+                        item = Hgsaiguo()
+                        item['id_score'] = y
+                        item['pt'] = pt
+                        yield item

+ 118 - 103
hg3535/spiders/saiguo.py

@@ -2,6 +2,7 @@ import datetime
 import re
 
 import scrapy
+from lxml import etree
 
 from ..items import Hgsaiguo
 
@@ -26,112 +27,126 @@ class HgjieshuSpider(scrapy.Spider):
     def parse(self, response):
         if response.status == 200:
             pt = response.meta['pt']
+            # 足球赛果
             if pt == 1:
-                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
-                # 获得所有比赛id对象
-                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
-                # 所有比赛id列表
-                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
-                temascore_list = []
-                for score in tema_score:
-                    # 正则匹配规则
-                    p1 = r"\d{1,3}-\d{1,3}"
-                    pattern1 = re.compile(p1)
-                    try:
-                        # 获取正则匹配结果
-                        c = pattern1.findall(score.extract())[0]
-                        temascore_list.append(c)
-                    except:
-                        c = ""
-                        temascore_list.append(c)
-                # 赛事id,赛事比元组列表
-                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
-                for y in tema_tupe:
-                    if y[1]:
-                        item = Hgsaiguo()
-                        item['id_score'] = y
-                        item['pt'] = pt
-                        yield item
+                league_ids = response.xpath('//div[@class="rt-l-bar football"]/@id').extract()
+                league_names = response.xpath('//div[@class="rt-l-bar football"]/span[@class="comp-txt"]/text()').extract()
+                for index in range(len(league_ids)):
+                    league_id = league_ids[index]
+                    league_name = league_names[index]
+                    response_data = response.xpath('//div[@id="dt-{}"]'.format(league_id)).extract_first()
+                    # response_data = response.xpath('//div[@id="dt-{}"]'.format('cmp-36254')).extract_first()
+                    data = etree.HTML(response_data)
+                    # 球队名
+                    team_names = data.xpath('//div[@class="rt-event"]/@title')
+                    # 全场
+                    f_scores = data.xpath('.//div[contains(@class, "rt-ft ")]')
+                    # 上半场
+                    h_scores = data.xpath('.//div[contains(@class, "rt-ht ")]')
+                    # 时间
+                    stimes = data.xpath('//div[@class="rt-event"]/../div[1]/span/text()')
+                    # 子集玩法
+                    # odd_names = data.xpath('//div[@class="rt-sub rt-data-hide"]/table/tbody[2]/tr/td[2]')
+                    # 子集玩法结果
+                    # odd_plays = data.xpath('//div[@class="rt-sub rt-data-hide"]/table/tbody[2]/tr/td[3]/span')
+                    match_ids = data.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
+                    odd_datas = data.xpath('//div[contains(@class, "rt-sub ")]/table/tbody[2]')
+                    for y in range(len(odd_datas)):
+                        match_id = match_ids[y].replace('e-', '')
+                        league_id = league_id.replace('cmp-', '')
+                        team_name = team_names[y].replace(' ', '').split('-')
+                        # 子集玩法
+                        odd_names = odd_datas[y].xpath('.//tr/td[2]')
+                        # 子集玩法结果
+                        odd_plays = odd_datas[y].xpath('.//tr/td[3]/span')
+                        # 主队
+                        h_name = team_name[0]
+                        # 客队
+                        a_name = team_name[1]
+                        print(h_name, a_name)
+                        # 上半场
+                        h_score = h_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
+                        # 全场
+                        f_score = f_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
+                        # 正则时间匹配规则
+                        pattern = re.compile(r"\d{1,3}:\d{1,3}")
+                        stime = pattern.findall(stimes[y])[0]
+                        if odd_names:
+                            for i in range(len(odd_names)):
+                                name = odd_names[i].text
+                                plays = odd_plays[i].xpath('text()')
+                                if len(plays) == 2:
+                                    play = '{}&&{}'.format(plays[0], plays[1])
+                                else:
+                                    play = plays[0]
+                        else:
+                            pass
+
+            # 篮球赛果
             if pt == 2:
-                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
-                # 获得所有比赛id对象
-                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
-                # 所有比赛id列表
-                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
-                temascore_list = []
-                for score in tema_score:
-                    # 正则匹配规则
-                    p1 = r"\d{1,3}-\d{1,3}"
-                    pattern1 = re.compile(p1)
-                    try:
-                        # 获取正则匹配结果
-                        c = pattern1.findall(score.extract())[0]
-                        temascore_list.append(c)
-                    except:
-                        c = ""
-                        temascore_list.append(c)
-                # 赛事id,赛事比元组列表
-                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
-                for y in tema_tupe:
-                    if y[1]:
-                        item = Hgsaiguo()
-                        item['id_score'] = y
-                        item['pt'] = pt
-                        yield item
+                league_ids = response.xpath('//div[@class="rt-l-bar sportHasQuater"]/@id').extract()
+                league_names = response.xpath('//div[@class="rt-l-bar sportHasQuater"]/span[@class="comp-txt"]/text()').extract()
+                for index in range(len(league_ids)):
+                    league_id = league_ids[index]
+                    league_name = league_names[index]
+                    response_data = response.xpath('//div[@id="dt-{}"]'.format(league_id)).extract_first()
+                    # response_data = response.xpath('//div[@id="dt-{}"]'.format('cmp-26405')).extract_first()
+                    data = etree.HTML(response_data)
+                    # 球队名
+                    team_names = data.xpath('//div[@class="rt-event"]/@title')
+                    # 全场
+                    f_scores = data.xpath('.//div[@class="rt-qft"]')
+                    # 上半场
+                    h_scores = data.xpath('.//div[@class="rt-qt1"]')
+                    # 下半场
+                    x_scores = data.xpath('.//div[@class="rt-qt2"]')
+                    # 时间
+                    stimes = data.xpath('//div[@class="rt-event"]/../div[1]/span/text()')
+                    match_ids = data.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
+                    odd_datas = data.xpath('//div[contains(@class, "rt-sub ")]/table/tbody[2]')
+                    for y in range(len(odd_datas)):
+                        match_id = match_ids[y].replace('e-', '')
+                        league_id = league_id.replace('cmp-', '')
+                        team_name = team_names[y].replace(' ', '').split('-')
+                        # 子集玩法
+                        child_data = odd_datas[y].xpath('./tr')
+                        # 主队
+                        h_name = team_name[0]
+                        # 客队
+                        a_name = team_name[1]
+                        print(h_name, a_name)
+                        # 上半场
+                        h_score = h_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
+                        # 全场
+                        f_score = f_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
+                        # 下半场
+                        x_score = x_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
+                        # 正则时间匹配规则
+                        pattern = re.compile(r"\d{1,3}:\d{1,3}")
+                        stime = pattern.findall(stimes[y])[0]
+                        if child_data:
+                            for i in range(len(child_data)):
+                                if i == 0:
+                                    h_datas = child_data[i].xpath('.//td/table/tbody/tr[3]/td[@class="r-odds"]')
+                                    a_datas = child_data[i].xpath('.//td/table/tbody/tr[4]/td[@class="r-odds"]')
+                                    if h_datas and a_datas:
+                                        for x in range(len(h_datas)):
+                                            # 主队节得分
+                                            h_data = h_datas[x].text.replace(' ', '').replace('\r\n', '')
+                                            # 客队节得分
+                                            a_data = a_datas[x].text.replace(' ', '').replace('\r\n', '')
+                                else:
+                                    # 子玩法名
+                                    child_name = child_data[i].xpath('.//td[contains(@class, "r-bt ")]/text()')[0].replace(' ', '').replace('\r\n', '')
+                                    # 子玩法结果
+                                    child_play = child_data[i].xpath('.//td[@class="r-odds"]/span[@class="prop"]/text()')[0]
+                        else:
+                            pass
 
+            # 网球赛果
             if pt == 3:
-                # 获得所有比赛获胜人,判断赛事是否结束
-                # tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/text()')
-                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/@title')
-                # 获得所有比赛id对象
-                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
-                # 所有比赛id列表
-                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
-                temascore_list = []
-                for score in tema_score:
-                    # 正则匹配规则
-                    # p1 = r"\d{1,3}-\d{1,3}"
-                    # pattern1 = re.compile(p1)
-                    try:
-                        # 获取正则匹配结果
-                        c = score.extract()
-                        temascore_list.append(c)
-                    except:
-                        c = ""
-                        temascore_list.append(c)
-                # 赛事id,赛事比元组列表
-                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
-                for y in tema_tupe:
-                    if y[1]:
-                        item = Hgsaiguo()
-                        item['id_score'] = y
-                        item['pt'] = pt
-                        yield item
+                pass
 
+            # 棒球赛果
             if pt == 4:
-                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
-                # 获得所有比赛id对象
-                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
-                # str.replace()
-                # 所有比赛id列表
-                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
-                temascore_list = []
-                for score in tema_score:
-                    # 正则匹配规则
-                    p1 = r"\d{1,3}-\d{1,3}"
-                    pattern1 = re.compile(p1)
-                    try:
-                        # 获取正则匹配结果
-                        c = pattern1.findall(score.extract())[0]
-                        temascore_list.append(c)
-                    except:
-                        c = ""
-                        temascore_list.append(c)
-                # 赛事id,赛事比元组列表
-                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
-                for y in tema_tupe:
-                    if y[1]:
-                        item = Hgsaiguo()
-                        item['id_score'] = y
-                        item['pt'] = pt
-                        yield item
+                pass