【VBA研究】⽤XMLHTTP的Post功能抓取数据
作者:
我前⼀阵⼦⽤VBA做了个⼯具,⽤XMLHTTP的Get功能抓取城市间距离。现在我想⽤⽤XMLHTTP的Post功能抓取邮件轨迹。抓取数据是⽤Get还是Post,取决于⽹站提交参数的⽅法。
1、通过分析(⽤fiddler),邮件轨迹查询⽹站是⽤post提交参数的。如下图:
上图中“Entity”内容⽤于设置包头,点击“TextView”可以看到传输的参数内容,邮件号码,如下图:
抓取数据的代码如下:
Sub tt()
Dim HttpReq As Object
Dim pdata, http As String
Set HttpReq = CreateObject("MSXML2.XMLHTTP.6.0")
'轨迹头部数据,⽹址⽤xxx屏蔽
http = "x/querypush-traln/qps/qpswaybilltraceinternal/queryTraceByTrace_no/"
pdata = "trace_no=1044905987232"
HttpReq.Open "Post", http, False
HttpReq.setRequestHeader "Content-Length", Len(pdata)
'HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded"
HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded; charset=utf-8"
HttpReq.send pdata '"trace_nos=1194359346482"
Do adyState = 4
DoEvents
Loop
If HttpReq.Status = 200 Then
Debug.sponseText
End If
End Sub
2、返回内容是个json结构的数据,可以⽤fiddler的查看返回内容,点击“TextView”:
点击“JSON”可以看到数据解析结果:
但vba解析的时候内容为空,对⽐以前返回json结构的数据,发现这个数据少了个名称,因为定义JS函数时,指定了⼀个json结构数据的名称,即jscode = "function json(s,i) { return eval('(' + s + ').traces[' + i + ']'); }"中的traces名称。
加上后就可以解析了,即:
'返回数据要成为标准的json结构,还需要在外⾯加⼀层数据名称
buf = "{""traces"":" & sponseText & "}"
kk = get_trace(buf)
get_trace函数就是⽤来解析json数据的,代码如下:
Function get_trace(mystring As String) As Integer
Dim objJSx As Object, objJSy As Object
Dim m1, m2, n, j As Integer
Dim source, level, kind, sm As String
On Error Resume Next
Set objJSx = CreateObject("ScriptControl") '调⽤MSScriptControl.ScriptControl对象将提取的变量⽂本运算形成对象集合 objJSx.Language = "JavaScript" '测试发现JavaScript、javascript、JScript都可以表⽰JavaScript语⾔
'定义⼀个JS函数,通过计算表达式的⽅式引⼊JSON数据并解析
jscode = "function json(s,i) { return eval('(' + s + ').traces[' + i + ']'); }"
objJSx.AddCode jscode
TT = "否"
For n = 1 To 100
If objJSx.Run("json", mystring, n - 1) = "" Then Exit For
Set objJSy = objJSx.Run("json", mystring, n - 1)
For j = 1 To 11
TraceInfo(n, j) = ""
Next j
TraceInfo(n, 1) = aceNo
TraceInfo(n, 2) = objJSy.opCode
TraceInfo(n, 3) = objJSy.opTime
TraceInfo(n, 4) = objJSy.opName
TraceInfo(n, 6) = objJSy.opOrgCode
TraceInfo(n, 7) = objJSy.opOrgSimpleName
TraceInfo(n, 8) = objJSy.operatorNo
TraceInfo(n, 9) = objJSy.operatorName
TraceInfo(n, 10) = objJSy.level
TraceInfo(n, 11) = objJSy.source
sm = objJSy.desc
'剔除数据中的HTML部分
Do While InStr(sm, "<") > 0
m1 = InStr(sm, "<")
m2 = InStr(sm, ">")
If m2 > 0 Then
If Mid(sm, m1, 3) = "/br" Then
sm = Left(sm, m1 - 1) & " " & Right(sm, Len(sm) - m2)
Else
sm = Left(sm, m1 - 1) & Right(sm, Len(sm) - m2)
End If
Else
Exit Do
End If
Loop
TraceInfo(n, 5) = sm
If objJSy.opCode = "704" Then TT = "是"
Next n
get_trace = n - 1
End Function
3、实际使⽤的代码
Public Sub get_data()
'根据⼯作表中的查询语句读取数据
Dim HttpReq As Object
Dim i, k, kk, lineno, row1 As Long
Dim Mail, pdata, tbhead As String, buf As String
Dim arr_head
lineno = [A65536].End(xlUp).Row '⾏数,也是邮件号码数量
Range("B2:B" & lineno).ClearContents
Range("B2:B" & lineno).ClearContents
'lineno = ActiveSheet.UsedRange.Rows.Count
Set HttpReq = CreateObject("MSXML2.XMLHTTP.6.0")
resize函数vba
'轨迹头部数据
'http = "x/querypush-traln/qps/qpswaybilltraceinternal/queryCurrentTraceByTrace_nos/" 'pdata = "trace_nos="
'轨迹数据
http = "x/querypush-traln/qps/qpswaybilltraceinternal/queryTraceByTrace_no/"
'pdata = "trace_no="
row1 = 2
maxrow = Sheets("查询结果").UsedRange.Rows.Count
If maxrow >= 1 Then
Sheets("查询结果").Range("A1:L" & maxrow).ClearContents
End If
tbhead = "邮件号码操作码操作时间处理动作详细说明机构代码机构名称操作员代码操作员姓名级别来源" arr_head = Split(tbhead, " ") '下标从0开始
Sheets("查询结果").Cells(1, 1).Resize(1, UBound(arr_head) + 1) = arr_head
For i = 2 To lineno
Mail = Trim(Sheets("邮件号码").Cells(i, 1))
If Mail = "" Then Exit For
If Len(Mail) = 13 Then
HttpReq.Open "Post", http, False
pdata = "trace_no=" & Mail
HttpReq.setRequestHeader "Content-Length", Len(pdata)
HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded"
HttpReq.send pdata 'pdata = "trace_no=1194359346482"
Do adyState = 4
DoEvents
Loop
'AllResponseHeaders
'Debug.sponseText
'返回数据要成为标准的json结构,还需要在外⾯加⼀层数据名称
buf = "{""traces"":" & sponseText & "}"
kk = get_trace(buf)
Sheets("邮件号码").Cells(i, 2) = TT
If kk > 0 Then
For k = kk To 1 Step -1
If CInt(TraceInfo(k, 10)) <= Range("E1") Then
For j = 1 To 11
Sheets("查询结果").Cells(row1, j) = TraceInfo(k, j)
Next j
row1 = row1 + 1
End If
Next k
Else
Sheets("邮件号码").Cells(i, 2) = "Err"
Sheets("查询结果").Cells(row1, 1) = Mail
Sheets("查询结果").Cells(row1, 2) = "Err"
Sheets("查询结果").Cells(row1, 4) = sponseText
row1 = row1 + 1
delay (9 * Rnd + 1) '出错了,说明你还是⼲快了,随机后延时1-10秒,看运⽓了。
End If
'If CInt(i / 10) * 10 = i Then
Application.StatusBar = "完成:" & Round(i * 100 / lineno, 2) & "%"
DoEvents
'End If
delay (Rnd + 0.25) '总部领导说了,接⼝是⼤家⽤的,你⼀个⼈不能⽤太多,此处延时0.5秒,降降速度。 Else
Sheets("邮件号码").Cells(i, 2) = "异常"
End If
Next i
Application.StatusBar = "就绪"
Sheets("查询结果").Activate
msg = MsgBox("邮件批量查询完毕,共查询" & i - 2 & "个邮件!", vbOKOnly, "AHEMS:iamlaosong") End Sub
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论