版本2:
# -*- coding:utf-8*- import sys reload(sys) sys.setdefaultencoding('utf-8') import time import requests time1=time.time() import pandas as pd import json iname=[] icard=[] courtName=[] areaName=[] caseCode=[] duty=[] performance=[] disruptTypeName=[] publishDate=[] def person_executed(name): for i in range(0,30): try: url="https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php?resource_id=6899" "&query=%E5%A4%B1%E4%BF%A1%E8%A2%AB%E6%89%A7%E8%A1%8C%E4%BA%BA%E5%90%8D%E5%8D%95" "&cardNum=&" "iname="+str(name)+ "&areaName=" "&pn="+str(i*10)+ "&rn=10" "&ie=utf-8&oe=utf-8&format=json" html=requests.get(url).content html_json=json.loads(html) html_data=html_json['data'] for each in html_data: k=each['result'] for each in k: print each['iname'],each['cardNum'],each['courtName'],each['areaName'],each['caseCode'],each['duty'],each['performance'],each['disruptTypeName'],each['publishDate'] iname.append(each['iname']) icard.append(each['cardNum']) courtName.append(each['courtName']) areaName.append(each['areaName']) caseCode.append(each['caseCode']) duty.append(each['duty']) performance.append(each['performance']) disruptTypeName.append(each['disruptTypeName']) publishDate.append(each['publishDate']) except: pass if __name__ == '__main__': name="郭**" person_executed(name) print len(iname) #####################將數(shù)據(jù)組織成數(shù)據(jù)框########################### # data=pd.DataFrame({"name":iname,"IDCard":icard}) detail_data=pd.DataFrame({"name":iname,"IDCard":icard,"courtName":courtName,"areaName":areaName,"caseCode":caseCode,"duty":duty,"performance":performance, "disruptTypeName":disruptTypeName,"publishDate":publishDate}) #################數(shù)據(jù)框去重#################################### # data1=data.drop_duplicates() # print data1 # print len(data1) detail_data1=detail_data.drop_duplicates() # print detail_data1 # print len(detail_data1) #########################寫出數(shù)據(jù)到excel######################################### pd.DataFrame.to_excel(detail_data1,"F:\iname_icard_query.xlsx",header=True,encoding='gbk',index=False) time2=time.time() print u'ok,爬蟲(chóng)結(jié)束!' print u'總共耗時(shí):'+str(time2-time1)+'s'
聲明:本網(wǎng)頁(yè)內(nèi)容旨在傳播知識(shí),若有侵權(quán)等問(wèn)題請(qǐng)及時(shí)與本網(wǎng)聯(lián)系,我們將在第一時(shí)間刪除處理。TEL:177 7030 7066 E-MAIL:11247931@qq.com