hg3535_zq_status_up.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. # -*- coding: utf-8 -*-
  2. import jsonpath
  3. import scrapy
  4. from scrapy.http import Request
  5. import psycopg2
  6. import time
  7. from functools import wraps
  8. from contextlib import contextmanager
  9. import psycopg2.extras
  10. from scrapy.conf import settings
  11. import json
  12. from datetime import datetime
  13. from datetime import date
  14. import itertools
  15. import re
  16. from scrapy.xlib.pydispatch import dispatcher
  17. from scrapy import signals
  18. # 测试一个函数的运行时间,使用方式:在待测函数直接添加此修饰器
  19. def timethis(func):
  20. @wraps(func)
  21. def wrapper(*args, **kwargs):
  22. start = time.perf_counter()
  23. r = func(*args, **kwargs)
  24. end = time.perf_counter()
  25. print('\n============================================================')
  26. print('{}.{} : {}'.format(func.__module__, func.__name__, end - start))
  27. print('============================================================\n')
  28. return r
  29. return wrapper
  30. # 测试一段代码运行的时间,使用方式:上下文管理器with
  31. # with timeblock('block_name'):
  32. # your_code_block...
  33. @contextmanager
  34. def timeblock(label='Code'):
  35. start = time.perf_counter()
  36. try:
  37. yield
  38. finally:
  39. end = time.perf_counter()
  40. print('==============================================================')
  41. print('{} run time: {}'.format(label, end - start))
  42. print('==============================================================')
  43. class SqlConn():
  44. '''
  45. 连接数据库,以及进行一些操作的封装
  46. '''
  47. sql_name = ''
  48. database = ''
  49. user = ''
  50. password = ''
  51. port = 0
  52. host = ''
  53. # 创建连接、游标
  54. def __init__(self, *args, **kwargs):
  55. if kwargs.get("sql_name"):
  56. self.sql_name = kwargs.get("sql_name")
  57. if kwargs.get("database"):
  58. self.database = kwargs.get("database")
  59. if kwargs.get("user"):
  60. self.user = kwargs.get("user")
  61. if kwargs.get("password"):
  62. self.password = kwargs.get("password")
  63. if kwargs.get("port"):
  64. self.port = kwargs.get("port")
  65. if kwargs.get("host"):
  66. self.host = kwargs.get("host")
  67. if not (self.host and self.port and self.user and
  68. self.password and self.database):
  69. raise Warning("conn_error, missing some params!")
  70. sql_conn = {
  71. 'postgresql': psycopg2,
  72. }
  73. self.conn = sql_conn[self.sql_name].connect(host=self.host,
  74. port=self.port,
  75. user=self.user,
  76. password=self.password,
  77. database=self.database,
  78. )
  79. self.cursor = self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
  80. # self.cursor = self.conn.cursor()
  81. if not self.cursor:
  82. raise Warning("conn_error!")
  83. # 测试连接
  84. def test_conn(self):
  85. if self.cursor:
  86. print("conn success!")
  87. else:
  88. print('conn error!')
  89. # 单条语句的并提交
  90. def execute(self, sql_code):
  91. self.cursor.execute(sql_code)
  92. self.conn.commit()
  93. # 单条语句的不提交
  94. def execute_no_conmmit(self, sql_code):
  95. self.cursor.execute(sql_code)
  96. # 构造多条语句,使用%s参数化,对于每个list都进行替代构造
  97. def excute_many(self, sql_base, param_list):
  98. self.cursor.executemany(sql_base, param_list)
  99. # 批量执行(待完善)
  100. def batch_execute(self, sql_code):
  101. pass
  102. # 获取数据
  103. def get_data(self, sql_code, count=0):
  104. self.cursor.execute(sql_code)
  105. if int(count):
  106. return self.cursor.fetchmany(count)
  107. else:
  108. return self.cursor.fetchall()
  109. # 更新数据
  110. def updata_data(self, sql_code):
  111. self.cursor.execute(sql_code)
  112. # 插入数据
  113. def insert_data(self, sql_code):
  114. self.cursor(sql_code)
  115. # 滚动游标
  116. def cursor_scroll(self, count, mode='relative'):
  117. self.cursor.scroll(count, mode=mode)
  118. # 提交
  119. def commit(self):
  120. self.conn.commit()
  121. # 回滚
  122. def rollback(self):
  123. self.conn.rollback()
  124. # 关闭连接
  125. def close_conn(self):
  126. self.cursor.close()
  127. self.conn.close()
  128. class ComplexEncoder(json.JSONEncoder):
  129. def default(self, obj):
  130. if isinstance(obj, datetime):
  131. return obj.strftime('%Y-%m-%d %H:%M:%S')
  132. elif isinstance(obj, date):
  133. return obj.strftime('%Y-%m-%d')
  134. else:
  135. return json.JSONEncoder.default(self, obj)
  136. class LanqiuSpider(scrapy.Spider):
  137. def __init__(self):
  138. super(LanqiuSpider).__init__()
  139. #信号量
  140. # dispatcher.connect(self.spider_opened, signals.spider_opened)
  141. dispatcher.connect(self.spider_closed, signals.spider_closed)
  142. # self.conn = SqlConn(sql_name='postgresql',host='127.0.0.1',port=5432,user='postgres',password='9998877',database='postgres')
  143. # database = PostgresqlDatabase('kaiyou',**{'host': '192.168.2.200', 'port': 10432, 'user': 'kaiyou', 'password': '123456'})
  144. # self.conn = SqlConn(sql_name='postgresql',host='192.168.2.200',port=10432,user='kaiyou',password='123456',database='kaiyou')
  145. self.conn = SqlConn(sql_name='postgresql',host=settings["POST_HOST"], port=settings['POST_PORT'], user=settings["POST_USER"],password=settings["POST_PASSWORD"],database=settings["POST_DATABASE"])
  146. name = "ball_status_update"
  147. allowed_domains = ['hg3535z.com']
  148. #sid要改为1 足球 现在测试改为4
  149. start_urls = ['https://hg3535z.com/odds2/d/getodds?sid=3&pt=4&ubt=am&pn=0&sb=2&dc=null&pid=0'] # 滚球菜单 篮球滚球列url
  150. # custom_settings = {
  151. # "ITEM_PIPELINES": {
  152. # 'scrapy_yzd.pipelines.BallStatuspipeline':1,
  153. # }
  154. # }
  155. # start_urls = ['http://hg3535z.com/odds2/d/getodds?sid=2&pt=3&ubt=am&pn=0&sb=2&dc=null&pid=0']
  156. # http: // hg3535z.com / odds2 / d / getamodds?eid = 3098030 & iip = false & ubt = am & isp = false
  157. # http://hg3535z.com/odds2/d/getodds?sid=2&pt=2&ubt=am&pn=0&sb=2&dc=null&pid=0
  158. def parse(self, response):
  159. # a = SqlConn(sql_name='postgresql',host='127.0.0.1',port=5432,user='postgres',password='9998877',database='postgres')
  160. # a.test_conn()
  161. b = self.conn.get_data("select match_id from st_ball_status where ball_type='足球'")
  162. d = list(itertools.chain(*b))
  163. # yield Request(url='http://hg3535z.com/odds2/d/getamodds?eid=3147927&iip=true&ubt=am&isp=false',callback=self.parse_each)
  164. for i in d:
  165. urls = 'https://hg3535z.com/odds2/d/getamodds?eid={}&iip=true&ubt=am&isp=false'.format(i)
  166. yield Request(url=urls,callback=self.parse_each)
  167. #取得url中的id字段
  168. def re_str(self,url_str):
  169. a = (re.findall(r"eid=(.+?)&",url_str))
  170. result = "".join(a)
  171. return result
  172. def parse_each(self,response):
  173. # print("URL: " + response.request.url)
  174. if response.text != "null":
  175. res = json.loads(response.text)
  176. # res = {"i":['false',1,3149430,4,2,"sh",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"28 / 04","99:00","下半场",'false','true',"足球",0],"eg":{"c":{"k":27078,"n":"罗马尼亚乙组联赛"},"es":[{"dbg":'true',"egid":0,"g":"","i":["阿格斯","法乌尔","1","True","28 / 04","","1","False","0","0","0","1","FT","","","",0,"0","1",0,"False"],"ibs":'true',"ibsc":'true',"lcd":{"lcpid":5,"ilup":'false',"lcid":3147921,"p":0},"k":3147921,"o":{},"pci":{"ctid":0},"egn":""},{"dbg":'false',"egid":0,"g":"","i":["阿格斯-角球数","法乌尔-角球数","1","True","28 / 04","99:00","1","False","0","0","4","2","下半场","Second Half","","",0,"","",0,"False"],"ibs":'true',"ibsc":'false',"k":3149430,"o":{"oe":{"s":192,"v":["o3239030643","0.00","o3239030644","0.00"],"n":"角球:滚球 单 / 双"}},"pci":{"ctid":1,"ctn":"角球"},"egn":""}]},"ot":2,"sc":{"3147921":{"a":1,"h":0},"3149430":{"a":2,"h":4}},"v":567862}
  177. # res1 = jsonpath.jsonpath(res,'$..eg..es[0]..o')
  178. res1 = jsonpath.jsonpath(res,'$..eg..es[:]..o')
  179. if len(res1) > 1:
  180. print("这是有角球啊")
  181. o_dict0 = res1[0] # 递归取o字典
  182. o_dict1 = res1[1]
  183. re_url = response.request.url
  184. res_id = self.re_str(re_url)
  185. print("我是角球id是")
  186. print(res_id)
  187. if o_dict0 or o_dict1:
  188. print("这不是个空字典")
  189. print("我不做任何操作的啊")
  190. # re_url = response.request.url
  191. # res_id = self.re_str(re_url)
  192. # print(res_id)
  193. # utime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  194. # a = SqlConn(sql_name='postgresql',host='127.0.0.1',port=5432,user='postgres',password='9998877',database='postgres')
  195. # self.conn.updata_data("update st_ball_status set status=0, update_time='{0}' where match_id={1}".format(utime,res_id))
  196. # sql1 = "insert into comendnotice(status, game_code, match_id,done_time) values (%s,%s, %s, %s) on conflict(match_id) do update set done_time = %s"
  197. # self.conn.cursor.execute(sql1,(4,'zq',res_id,utime,utime))
  198. # cursor.execute(sql1, (match_id, create_time,staus,update_time, ball_type,update_time))
  199. # self.conn.commit()
  200. else:
  201. o_dict0 = res1[0]
  202. if not o_dict0:
  203. print("这是空字典我要改状态")
  204. re_url = response.request.url
  205. res_id = self.re_str(re_url)
  206. print(res_id)
  207. utime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  208. # a = SqlConn(sql_name='postgresql',host='127.0.0.1',port=5432,user='postgres',password='9998877',database='postgres')
  209. self.conn.updata_data("update st_ball_status set status=0, update_time='{0}' where match_id={1}".format(utime,res_id))
  210. # sql1 = "insert into comendnotice(status, game_code, match_id,done_time) values (%s,%s, %s, %s) on conflict(match_id) do update set done_time = %s"
  211. self.conn.updata_data("update st_zq_result set status=2 where match_id={}".format(res_id))
  212. self.conn.updata_data("update st_zq_result_record set status=2 where match_id={}".format(res_id))
  213. self.conn.updata_data("update st_zq_competition set status=2 where match_id={}".format(res_id))
  214. # self.conn.cursor.execute(sql1,(4,'zq',res_id,utime,utime))
  215. # cursor.execute(sql1, (match_id, create_time,staus,update_time, ball_type,update_time))
  216. self.conn.commit()
  217. # if o_dict:
  218. # print("有数据")
  219. # else:
  220. # print("没数据")
  221. if response.text == "null":
  222. print("暂时没有数据")
  223. re_url = response.request.url
  224. res_id = self.re_str(re_url)
  225. print(res_id)
  226. utime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  227. # a = SqlConn(sql_name='postgresql',host='127.0.0.1',port=5432,user='postgres',password='9998877',database='postgres')
  228. self.conn.updata_data("update st_ball_status set status=0, update_time='{0}' where match_id={1}".format(utime,res_id))
  229. sql1 = "insert into comendnotice(status, game_code, match_id,done_time) values (%s,%s, %s, %s) on conflict(match_id) do update set done_time = %s"
  230. self.conn.cursor.execute(sql1,(4,'zq',res_id,utime,utime))
  231. self.conn.updata_data("update st_zq_result set status=2 where match_id={}".format(res_id))
  232. self.conn.updata_data("update st_zq_result_record set status=2 where match_id={}".format(res_id))
  233. self.conn.updata_data("update st_zq_competition set status=2 where match_id={}".format(res_id))
  234. # cursor.execute(sql1, (match_id, create_time,staus,update_time, ball_type,update_time))
  235. self.conn.commit()
  236. # a.close_conn()
  237. # def spider_opened(self, spider):
  238. def spider_closed(self, spider):
  239. print("我要关闭了")
  240. self.conn.close_conn()
  241. # new_datas = json.loads(response.text).get('eg', "")
  242. # if new_datas:
  243. # new_data = new_datas.get("es", "")
  244. # result = new_data[0]
  245. # o_dic = result['o']
  246. # if response.text == "null" or not o_dic:
  247. # print("没有数据")
  248. # print(response.body)
  249. # if response.text == "null":
  250. # print("这个网页没有数据的")