本文于r format(Sys.Date(), "%Y-%m-%d")更新。 如发现问题或者有建议,欢迎提交 Issue
中台数据提取
<!-- 开始搞中台的函数,关键是要一个个拆开来,真烦。 -->
<!-- 然后开始JOIN,真累。 -->
<!-- 但是有方向了! -->
# coding=utf8
import requests
import pandas as pd
import json
from model.baichuan.func.utils import config
import的包没什么变化。
- desc:
- doc:
- url_zhongtai:
中台也有config的。
- 测试环境的hosts:
172.17.4.29和 readapi.user.ppdaicorp.com。
class函数
class UserReadService:
def __init__(self, url = config['url_zhongtai'],
data = defaultQueryRequestData,
headers=defaultQueryRequestHeaders,
contentStr = defaultContentStr,
timeout = 10
):
# 接口url
self.url = url
self.data = data
self.headers = headers
self.contentStr = contentStr
self.timeout = timeout
这里class UserReadService表达一类函数。
class UserReadService:
def __init__(self, url = config['url_zhongtai'],
data = defaultQueryRequestData,
headers=defaultQueryRequestHeaders,
contentStr = defaultContentStr,
timeout = 10
):
# 接口url
self.url = url
self.data = data
self.headers = headers
self.contentStr = contentStr
self.timeout = timeout
这个地方的
defaultQueryRequestDatadefaultQueryRequestHeadersdefaultContentStr
都是有一大串定义好的。
- oop - Python init and self what do they do? - Stack Overflow 所以这里就是自引吗?不是特别的懂,但是有个有趣的例子吧。 Python 2 的例子。
<!-- -->
class MyClass(object):
i = 123
def __init__(self):
self.i = 345
a = MyClass()
print a.i
345
print MyClass.i
123
以函数为例。
def getPageUserData(self,users, options, pageNum, pageSize=200):
if len(users) == 0:
return pd.DataFrame()
contents = self.contentStr % (options, str(users)[1:-1], str(pageNum), str(pageSize))
data = self.data % (options,contents)
request_data = requests.post(self.url,data = data,headers = self.headers, timeout=self.timeout).text
request_data = json.loads(request_data)
if request_data['code'] == 0:
response = request_data['data'][options]['data']
pages = request_data['data'][options]['pageInfo']['pages']
else:
raise Exception(request_data['message'])
res = []
for ele in response:
if len(ele)!=0:
res.append(ele)
df = pd.DataFrame.from_dict(res)
if len(df) != 0:
df.columns = map(lambda x: x.lower(),df.columns)
df.columns = map(lambda x: x.lower(),df.columns)
return df, pages
重点
这是最核心的函数 request_data = requests.post(self.url,data = data,headers = self.headers, timeout=self.timeout).text 。
经陈建提醒,这个接口函数,最重要的就是这句话。 request_data = requests.post(self.url,data = data,headers = self.headers, timeout=self.timeout).text其他都是在这个基础上搞的,不要跑偏了。 要是跟大家讲接口的东西,这个就是最重要的了!
举例
data = '''
<!-- {"appid":"cert01","async":0,"content":{"userdetailsinfo":{"conditions":{"userid":{"operators":"EQUAL","value":"65"}},"order":{"name":"userid","regular":"desc"},"page":{"pageNum":1,"pageSize":11}}},"options":"userdetailsinfo","userid":"65"} -->
'''
headers = {"Content-type":"application/json"}
print(data)
这里要字符串话,否则data就是dict格式了。
当然也可以使用dict转json的方式 用json.dumps实现。
data1 = json.dumps(data1)
type(data1)
格式就变成str了。
request_data = requests.post('http://readapi.user.ppdaicorp.com/queryData',data = data1,headers = headers, timeout=10).text
print(request_data)
<!-- ``` -->
<!-- {"code":0,"message":"success","version":null,"data":{"userdetailsinfo":{"data":[{"userid":65,"username":"q1waq1wa","borrowerrole":1,"regsourceid":1,"realname":"王杨","mobilephone":"15000649212","qq":"aaa","educationid":1,"idnumber":"34120219881226091X","marriagestatusid":1,"residencetypeid":1,"residencephone":"aaa","residenceyears":123,"hasbuycar":1,"nciiccityid":123,"creationdate":"2016-09-09 17:48:25","lastupdatedate":"2016-09-09 17:48:25","regstepid":1,"registertypeid":1,"role":8,"picture":"aaa","residenceaddress":"aaa","gender":1,"age":1,"rating":"aaa","hassameflashuser":true,"currentlycityid":123,"lastcontactmethod":123,"lastcontactdate":"2016-09-09 17:48:25","provinceid":123,"cityid":123,"districtid":123,"assessamounttype":123,"weixinnumber":"aaa","assets":"aaa","inserttime":"2007-06-18 14:28:50","updatetime":"2017-08-14 09:41:34","commonemail":"aaa"}],"pageInfo":{"pageNum":1,"pageSize":11,"total":0,"pages":0,"size":1}}}} -->
<!-- ``` -->
request_data = json.loads(request_data)
print(request_data)
pd.DataFrame.from_dict(request_data)
就好了。
pd.DataFrame(pd.DataFrame.from_dict(request_data).iloc[0,1]['data'])的结果就是很好的结果。