across/cet_query.py
2017-08-24 23:06:15 +08:00

110 lines
2.3 KiB
Python

#!/usr/bin/env python3
#coding=utf-8
def text(files):
with open('cet.txt','w+') as f:
f.write(files)
def read_text():
with open('cet.txt','r') as f:
data = f.read()
return data
import requests,os
from time import sleep
from urllib.parse import quote
from bs4 import BeautifulSoup as bs
URL='http://www.chsi.com.cn/cet/'
data = 'query?zkzh={}&xm={}'
H = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'zh-CN,zh;q=0.8',
'Connection':'keep-alive',
'Host':'www.chsi.com.cn',
'Referer':'http://www.chsi.com.cn/cet/',
'Upgrade-Insecure-Requests':'0',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
}
def query(id__,name):
s = requests.Session()
req = s.get(URL)
if req.ok :
url = URL + data.format(id__,quote(name))
print(url)
req = requests.get(url,headers=H,cookies=req.cookies)
return req.text
else:
print(id__,name,'出错',sep='-->')
return False
def check(html):
soup = bs(html,'html.parser')
if soup.find('div',{"class":"error alignC marginT20"}):
return False
elif soup.find('div',{"class":"error alignC"}):
print('要求验证码')
return False
else:
return True
#html = read_text()
def parse(html):
soup = bs(html,'html.parser')
table = soup.find('table',{"border":"0","align":"center"})
string = ''
for n in table.getText().split():
string += n
return string
def append_file(string):
with open('cet.txt','a+') as f:
f.writelines(string + os.linesep)
number = 420550171103500,420550171103600 ### 420550171103524 贺深
### testing
'''text = query(420550171103524,'贺深')
if check(text):
print(parse(text))
else:
print('没有')
exit(0)
### testing end
names = ['贺深','张旭','尘飞杨']'''
'''for xm in names:
for zkzh in [ i for i in range(*number) ]:
sleep(10)
text = query(zkzh,xm)
if check(text):
print('result has been checked')
result = parse(text)
print(result)
append_file(result)
else:
print('result has not been checked')
print(zkzh,xm,sep='-->')'''
text = query('420550171103524','贺深')
if check(text):
print('result has been checked')
result = parse(text)
print(result)
append_file(result)
else:
print('result has not been checked')