import requests import re import pymysql p=0 x=re.compile(r' <span class="t2"><a target="_blank" title="(.*?)" href="') y=re.compile(r'<a target="_blank" title="(.*?)" href=".*?" on') z=re.compile(r'<span class="t4">(.*?)</span>') j=re.compile(r'a target="_blank" title=".*?" href="(.*?)" on') for i in range(1,21): url='**/list/070200,000000,0000,00,9,99,%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590,2,{}.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='.format(i) headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'} # tail%2F%3Fquery%3D%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590%26city%3D101190100%26industry%3D%26position%3D&p=IvdF9Ge3R2sBRGsxOA%7E%7E''} response = requests.get(url=url,headers=headers) response.encoding='gb2312' x1=re.findall(x,response.text) y1=re.findall(y,response.text) z1=re.findall(z,response.text) del (z1[0]) # print(z1) j1=re.findall(j,response.text) for i in range (len(j1)): # p=p+1 # print(j1[i],p) # print(str(p)+':'+x1[i]+' '+y1[i]+' '+z1[i]+' '+j1[i]) # print(z1[i],p) db = pymysql.connect( host='。。', port=3306, user='root', passwd='。。', database='test', charset='utf8' ) cursor = db.cursor() sql = "INSERT INTO job51 (name,position,RMB,LINK) VALUES ('{}','{}','{}','{}');".format(x1[i],y1[i],z1[i],j1[i]) cursor.execute(sql) db.commit() cursor.close() db.close() # print(x1) # print(y1) # print(z1) # print(j1) # # print(response.text)
爬虫练习-某站数据爬取代码并存入数据库(简易)