Click here to Skip to main content
15,867,686 members
Please Sign up or sign in to vote.
1.00/5 (1 vote)
See more:
I'm trying to crawler weather data from a website , simulate the web browser and the windows configuration of proxy, my web browser need to configure net proxy settings so that web browser can access internet website. I have the proxy ip and port. I want to reappear web browser process of accessing website by nodejs.
I use nodejs:http,nodejs:https and nodejs:tls packs.
I have done this before by using C language, so I am familiar with the http request procedure.
1. client need to build a tcp connect with proxy
2. client send http CONNECT request and the proxy may response connection established.
3. client start negotiation with proxy , say hello , exchange cypher suites and so on, which I'm not quite exactly clear but the OPENSSL do most of the job.
4. client send and recv through the read bio and the write bio which provide by openssl. Here you could send your GET/POST requests as it like in http.

I'm reading the nodejs mdn, seemed http,https,tls have something with to do my job. I also tried the requst pack, which seemed to be the encapsulation of of these metion before. So I dropped it.
I have write a little piece of code below.

What I have tried:

JavaScript
//crawler by proxy.
var http=require('http')
var https=require('https')
var tls=require('tls')
//const request=require('request') 
var opt={
    host:'172.254.18.15',   //a proxy ip,here I give a ip of local network proxy ip.
    port: 8080,             //proxy services port.
    keepAlive:true,
    method:'CONNECT',
    headers:{
        host:'www.163.com', //crawler website, which when you use http , it would redirect to https links.
        path:'/',
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36'
    }
}

//First. Send the http CONNECT request
//Second. Try to reuse the socket of above conection and start SSL/TLS negotiation with proxy server.
var req=http.request(opt, function(res) {
    console.log("Got response: " + res.statusCode)
    if(res.statusCode===301||res.statusCode===302) {
        var raw_headers=res.rawHeaders;
        console.log('Please redirect '+raw_headers[raw_headers.indexOf('Location')+1]);
    }
    
    let body='';
    res.on('data',function(d){
        body += d
    }).on('end', function(){
       console.log(body)
    });
}).on('connect',(res,socket,head)=>{
    console.log('request method '+this.method)
    console.log("Got response: " + res.statusCode)
    console.log(res.rawHeaders)
    console.log('connected.')
    console.log(head.toString())
    
    //how to reuse the connection socket of http in https?
    let tls_opts={
        socket:socket,
        method:'GET',
        host:'www.baidu.com',
        path:'/'
    }
    let tls_req=https.request(tls_opts,(res)=>{
        let tls_data=''
        res.on('data',(data)=>{
            tls_data+=data
            this.end()
        }).on('end',()=>{
            console.log(tls_data)
            console.log('disconnected.')
        })
    }).on('error',(err)=>{
        console.log('tls error:'+err);
    })
    tls_req.end()
}).on('error', function(e) {
    console.log("Got error: " + e.message);
}).setTimeout(3000)
req.end()

It did not work. In my case, the CONNECT request sended and the proxy responsed the 'connection established', but the tls was wrong. I'm not familiar with nodejs. Much of the code on the internet did not show the detail of proxy.
Posted
Updated 5-Feb-23 15:16pm
v2

1 solution

I'm here to submit my code for a mark.
The trick is Object https.agent.
JavaScript
//crawler by proxy.
var http=require('http')
var https=require('https')
//var tls=require('tls')
//const request=require('request') 
var opt={
    host:'208.70.77.222',  //a proxy ip,here I give a ip of local network proxy ip.
    port: 1994,             //proxy services port.
    method:'CONNECT',
    path:'www.163.com:443',
    headers:{
        'Proxy-Connection':'keep-alive',
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36'
    }
}

//First. Send the http CONNECT request
//Second. Try to reuse the socket of above conection and start SSL/TLS negotiation with proxy server.
var req=http.request(opt, function(res) {
    console.log("Got response: " + res.statusCode)
    if(res.statusCode===301||res.statusCode===302) {
        var raw_headers=res.rawHeaders;
        console.log('Please redirect '+raw_headers[raw_headers.indexOf('Location')+1]);
    }
    
    let body='';
    res.on('data',function(d){
        body += d
    }).on('end', function(){
       console.log(body)
    });
}).on('connect',(res,socket,head)=>{
    console.log('request method '+req.method)
    console.log("Got CONNECT response: " + res.statusCode)
    console.log(res.rawHeaders)
    console.log(head.toString())
    if(res.statusCode===200) console.log('Connected.');
    
    //how to reuse the connection socket of http in https?
    //let tls_opts={
    //    socket:socket,
    //    method:'GET',
    //    host:'www.baidu.com',
    //    path:'/'
    //}
    //let tls_req=https.request(tls_opts,(res)=>{
    //    let tls_data=''
    //    res.on('data',(data)=>{
    //        tls_data+=data
    //        this.end()
    //    }).on('end',()=>{
    //        console.log(tls_data)
    //        console.log('disconnected.')
    //    })
    //}).on('error',(err)=>{
    //    console.log('tls error:'+err);
    //})
    //tls_req.end()
    
    const agent=new https.Agent({
        socket:socket //reuse the http connection
        //rejectUnauthorized: false //sometime when the CA was not qualified you could use this option 
    })
    const https_request=https.request({ //when I used https.request plus https.end, it works , but when I used https.get, something wrong.
        host:'www.163.com',
        path:'/',
        method:'GET',
        port:443,
        agent:agent
    },(res)=>{
        console.log(res.rawHeaders);
        let chunks=[]
        res.on('data',(data)=>{
            chunks.push(data)
        }).on('end',()=>{
            console.log(Buffer.concat(chunks).toString('utf8'))
        })
    }).on('error',(e)=>{
        console.log('https failed.');
        console.log(e);
    })
    https_request.end()
    
}).on('error', function(e) {
    console.log("Got error: " + e.message);
}).setTimeout(3000)
req.end()
 
Share this answer
 

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900