博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
高德地图关键字搜索
阅读量:7254 次
发布时间:2019-06-29

本文共 9185 字,大约阅读时间需要 30 分钟。

hot3.png

#!/usr/bin/python# -*- coding: UTF-8 -*-from bs4 import BeautifulSoupimport pymysqlimport refrom urllib import urlopenimport requestsimport jsonimport mathprovinceList = [                    {"adcode": "110000", "name": "北京市"},                    {"adcode": "120000", "name": "天津市"},                    {"adcode": "130000", "name": "河北省"},                    {"adcode": "140000", "name": "山西省"},                    {"adcode": "150000", "name": "内蒙古自治区"},                    {"adcode": "210000", "name": "辽宁省"},                    {"adcode": "220000", "name": "吉林省"},                    {"adcode": "230000", "name": "黑龙江省"},                    {"adcode": "310000", "name": "上海市"},                    {"adcode": "320000", "name": "江苏省"},                    {"adcode": "330000", "name": "浙江省"},                    {"adcode": "340000", "name": "安徽省"},                    {"adcode": "350000", "name": "福建省"},                    {"adcode": "360000", "name": "江西省"},                    {"adcode": "370000", "name": "山东省"},                    {"adcode": "410000", "name": "河南省"},                    {"adcode": "420000", "name": "湖北省"},                    {"adcode": "430000", "name": "湖南省"},                    {"adcode": "440000", "name": "广东省"},                    {"adcode": "450000", "name": "广西壮族自治区"},                    {"adcode": "460000", "name": "海南省"},                    {"adcode": "500000", "name": "重庆市"},                    {"adcode": "510000", "name": "四川省"},                    {"adcode": "520000", "name": "贵州省"},                    {"adcode": "530000", "name": "云南省"},                    {"adcode": "540000", "name": "西藏自治区"},                    {"adcode": "610000", "name": "陕西省"},                    {"adcode": "620000", "name": "甘肃省"},                    {"adcode": "630000", "name": "青海省"},                    {"adcode": "640000", "name": "宁夏回族自治区"},                    {"adcode": "650000", "name": "新疆维吾尔自治区"},                    {"adcode": "710000", "name": "台湾省"},                    {"adcode": "810000", "name": "香港特别行政区"},                    {"adcode": "820000", "name": "澳门特别行政区"},                ]def saveProvince():     db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8")     cursor = db.cursor()     sql = 'insert into gd_area(code,name) values(%s,%s)'     for province in provinceList:          cursor.execute(sql,(province['adcode'],province['name']))          db.commit()          # print(province['name']);     cursor.close()     db.close()# saveProvince();def connectMysql():     db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8")     cursor = db.cursor()     sql_select = "select version()"     cursor.execute(sql_select)     data = cursor.fetchone()     print("DB version is : %s" % data)       cursor.close()     db.close()# connectMysql()def queryMysql():     db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8")     cursor = db.cursor()     sql = " select * from gd_area ";     cursor.execute(sql)     data = cursor.fetchone()     print(data[2])     cursor.close()     db.close()# queryMysql()def parseHtml():     db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8")     cursor = db.cursor()     sql_select=" select * from gd_area where name =%s"     sql_update=" insert into gd_area(code,name,level,parentId) values(%s,%s,2,%s) "     path = '/Users/chanming/Desktop/a.html'     htmlfile = open(path, 'r')     htmlhandle = htmlfile.read()     htmlhandle=bs_preprocess(htmlhandle)     soup = BeautifulSoup(htmlhandle, 'lxml')     pdl = soup.find('dl')     for child in pdl.children:          if child and child.name=='dt':               cursor.execute(sql_select,(child.get_text()[0:-1]))               result = cursor.fetchone()               # print(result[2])               # print(child.get_text()[0:-1])               # soup.               next=child.nextSibling;               # if next:               # next=child.nextSibling;               # print(next.name)               liList = next.children;               for perLi in liList:                    # print(result[1])                     cursor.execute(sql_update,(perLi.get('adcode'),perLi.get_text(),result[1]))                    # print(perLi.get("adcode"))     db.commit()                    cursor.close()     db.close()def bs_preprocess(html):     """remove distracting whitespaces and newline characters"""     pat = re.compile('(^[\s]+)|([\s]+$)', re.MULTILINE)     html = re.sub(pat, '', html)       # remove leading and trailing whitespaces     html = re.sub('\n', ' ', html)     # convert newlines to spaces                                        # this preserves newline delimiters     html = re.sub('[\s]+<', '<', html) # remove whitespaces before opening tags     html = re.sub('>[\s]+', '>', html) # remove whitespaces after closing tags     return html # parseHtml()     detailUrl="https://www.amap.com/detail/get/detail?id=%s"schoolUrl="https://www.amap.com/service/poiInfo?query_type=TQUERY&pagesize=20&pagenum=1&qii=true&cluster_state=5&need_utd=true&utd_sceneid=1000&div=PC1000&addr_poi_merge=true&is_classify=true&zoom=13&city={}&geoobj=109.703841%7C38.259262%7C109.868675%7C38.302393&keywords=%E5%B9%BC%E5%84%BF%E5%9B%AD"def pullschoolInfo():     s=schoolUrl.format(120000)     # print(s)     # res=urlopen(s)     r = requests.get(s)     print(r.json())# pullschoolInfo()def readJsonFile():     db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8")     cursor = db.cursor()     path="/Users/chanming/Desktop/dis.json"     jsonStr=  open(path ,'r')     jsonObject = json.load(jsonStr)     disList = jsonObject['districts']     saveDis(disList,0,cursor,db)     db.commit()                    cursor.close()     db.close()     # print(jsonObject)def saveDis(disList,parent,cursor,db):     update_sql = "insert into gd_districts (name,citycode,adcode,parent) values( %s,%s,%s,%s)"     if disList:          for dis in disList:               if not dis['citycode'] :                    dis['citycode']=''               # print(dis['citycode'])               cursor.execute(update_sql,(dis['name'],dis['citycode'],dis['adcode'],parent))               db.commit()               saveDis(dis['districts'],dis['adcode'],cursor,db)          # readJsonFile()def overWork():     url="https://restapi.amap.com/v3/place/text?s=rsv3&children=&key=e035d1ccf41f290d31b2251ee9fd6deb&offset=50&page={}&city={}&citylimit=true&extensions=all&language=undefined&callback=jsonp_381535_&platform=JS&logversion=2.0&appname=file%3A%2F%2F%2FUsers%2Fchanming%2FDesktop%2Fb.html&csid=D740A44A-DE94-4ECB-9478-1A811AF84359&sdkversion=1.4.10&keywords=%E5%B9%BC%E5%84%BF%E5%9B%AD"     db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8")     cursor = db.cursor()     query_sql = " select * from gd_districts where LEVEL=2 "      cursor.execute(query_sql)     sqlList = cursor.fetchall()      headers = {'Content-Type': 'application/json'}     f=open('/Users/chanming/Desktop/c.txt','a')     for sql in sqlList:          requrl=url.format(1,sql[2])          result= requests.get(requrl,headers)          index=result.text.index('(')          jsonObject = json.loads(result.text[index+1:-1])          pages=int(math.ceil(float(jsonObject['count'])/50))          parsepois(jsonObject['pois'],cursor,db)          # print(pages)          for i in range(1,pages):               # print(i+1)               requrl=url.format(i+1,sql[2])               result= requests.get(requrl,headers)               index=result.text.index('(')               jsonObject = json.loads(result.text[index+1:-1])               parsepois(jsonObject['pois'],cursor,db)          f.write(sql[2]+",")          print(sql[2])     f.close()def parsepois(poisList,cursor,db):     update_sql = "insert into gd_kids(idstr,name,address,tel,website,email,pcode,pname,citycode,cityname,adcode,adname)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) "     if poisList:          for poi in poisList:               if not poi['address']:                    poi['address']=''               if not poi["tel"]:                    poi["tel"]=""               if not poi['website']:                    poi['website']=""               if not poi['email']:                    poi['email']=""               if not poi['pcode']:                    poi['pcode']=""               if not poi['pname']:                    poi['pname']=''               if not poi['citycode']:                    poi['citycode']=''               if not poi['adcode']:                    poi['adcode']=''               if not poi['adname']:                    poi['adname']=''               cursor.execute(update_sql,(poi['id'],poi['name'],poi['address'],poi['tel'],poi['website'],poi['email'],poi['pcode'],poi['pname'],poi['citycode'],poi['cityname'],poi['adcode'],poi['adname']))               db.commit()overWork()    # print "你好,世界";

转载于:https://my.oschina.net/u/3238650/blog/2253804

你可能感兴趣的文章
初识Continuation
查看>>
smooth l1
查看>>
ET–异步协程使用–TimerComponent篇
查看>>
Linux LVM学习总结——Insufficient Free Extents for a Logical Volume
查看>>
智课雅思词汇---二十一、名词性后缀acity是什么意思
查看>>
JavaWeb 返回json数据的两种方式
查看>>
(转)Java 详解 JVM 工作原理和流程
查看>>
关于如何获得数据库数据变化的情况(比定时查询方便多了)
查看>>
阿里员工都是这样排查Java问题的,附工具单(转)
查看>>
用flutter写一个精美的登录页面
查看>>
[转]Docker php extensions gd
查看>>
Java Program Mapping GB2312 to Unicode
查看>>
C语言标准中的逻辑位移和算术位移
查看>>
查看当前运行的SQL语句
查看>>
【Python】opencv显示图像
查看>>
Web配置文件(web.config)简介
查看>>
如何培养员工的团队合作精神
查看>>
POJ 1151 Atlantis (线段树)
查看>>
在sqlserver中如何根据字段名查找字段所在的表
查看>>
quality center 11备份最佳方案测试通过可用
查看>>