爬⾍:如何破解表单提交参数(FormDate)的⽹站,模拟发送Http的post表单提
交⽅式的请求
这时打开F12检查页⾯元素,发现此处有⼀参数,位于FormDate下:
此时需要模拟表单提交的⽅式来请求页⾯,具体代码如下:
package awler;
import org.apache.http.*;
import org.apache.ity.UrlEncodedFormEntity;printform
import org.apache.hods.CloseableHttpResponse;
import org.apache.hods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.ssage.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import des.Document;
import des.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by tyx on 2017/8/2.
*/
public class HttpClient {
public static String httpPostWithJSON(String url) throws Exception {
HttpPost httpPost = new HttpPost(url);
CloseableHttpClient client = ateDefault();
String respContent = "";
//        表单⽅式
Map<String,List<NameValuePair>> map = new HashMap<>();
//        NameValuePair⼀般⽤于Java向Http发送post请求
for (int i=1;i<=5;i++){
List<NameValuePair> nvps = new ArrayList<>();
nvps.add(new BasicNameValuePair("__VIEWSTATE","VgMkepRUCq07T5PvxUy+7a7yW1zAP9DkZlv/
EW6v4u9tbu3S5P3berU8P6LRLCBCdJ/oL0ijkPiT+zC+9TGovLtmCwbPX57Ywm6pklZlghaeq0qBvt8FgbxkDYRKzqvo1KVKWDLzfkS8            nvps.add(new BasicNameValuePair("__EVENTTARGET","ctl00$ContentPlaceHolder1$AspNetPager1"));
nvps.add(new BasicNameValuePair("__EVENTARGUMENT",i+""));
map.put(i+"",nvps);
}
//向对⽅服务器发送Post请求
try {
//将参数进⾏封装,提交到服务器端
for (String key : map.keySet()){
httpPost.setEntity(new (key),"UTF8"));
CloseableHttpResponse httpResponse = ute(httpPost);
//                200为成功访问的返回值
StatusLine().getStatusCode() == 200) {
Document doc = Jsoup.Entity()));
Elements elements = doc.select("table.data-list").select("a");
for (Element element : elements){
System.out.());
}
//                    Header[] headers = AllHeaders();
//                    for (Header header : headers) {
//                        System.out.Name() + ": " + Value());
//                    }
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
httpPost.abort();      //释放资源
}
return respContent;
}
public static void main(String[] args) throws Exception {
String result = httpPostWithJSON("113.108.219.40/Dop/Open/EnterpriseList.aspx");
System.out.println(result);
}
}
其中表单提交参数的⽅式⼀般都是post,因此使⽤NameValuePair,这⾥通过Jsoup解析后返回出前三页公司的名字
欢迎⼤家⼀起交流爬⾍⽅⾯的经验

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。