
Recently we received quite a few requests from people wanting to use Ghostlab for their website and webapp testing from behind corporate proxies. Unfortunately, as Ghostlab’s server component is built on node.js and the HTTP client in node.js assumes that a direct connection to the Web can be made (i.e., node.js ignores the system’s proxy settings), we had to build our own support to proxy requests in node.
There are some other blog posts on how to do a request through a proxy for HTTP, so adding that was easy. But not for HTTPS, so that’s what this little write-up is mainly about.
HTTP
Let’s start with the easy part. Assume you want to request the content of https://www.vanamco.com/ghostlab. Requesting it through a proxy, we would do something like:
- Connect to the proxy server (e.g. with
telnet 192.168.5.8 3128
if 192.168.5.8 is the IP of your proxy server running on port 3128),
- do the GET request using the full URL as the path:
GET https://www.vanamco.com/ghostlab/ HTTP/1.1
Or with node.js:
var Http = require('http');
var req = Http.request({
host: '192.168.5.8',
// proxy IP
port: 3128,
// proxy port
method: 'GET',
path: 'https://www.vanamco.com/ghostlab/' // full URL as path
}, function (res) {
res.on('data', function (data) {
console.log(data.toString());
});
});
req.end();
HTTPS
While the above code is the correct way to do a HTTP GET request through a proxy, it won’t work if you try to use HTTPS. For instance, if you replace the path by https://www.twitter.com
, your proxy server might say something like
“Unsupported Request Method and Protocol. Squid does not support all request methods for all access protocols.”
In fact, you actually wouldn’t want the proxy to be able to do this; you wouldn’t want it to decrypt and send you back your unencrypted data. Instead, proxies support the CONNECT
request, which will establish a tunnel to the remote server, so the data will remain encrypted.
When you open a telnet session (telnet 192.168.5.8 3128
) and do a CONNECT
request
CONNECT twitter.com:443
the proxy will answer with
HTTP/1.0 200 Connection established
This means, that the proxy set up the tunnel to the host. Now you can communicate with the host through the tunnel. I.e., you’ll have to start by sending your encrypted request (which is a bit hard to do with telnet, so the next code listing shows how to do it with node.js).
var Http = require('http');
var Tls = require('tls');
var req = Http.request({
host: '192.168.5.8',
port: 3128,
method: 'CONNECT',
path: 'twitter.com:443',
});
req.on('connect', function (res, socket, head) {
var cts = Tls.connect({
host: 'twitter.com',
socket: socket
}, function () {
cts.write('GET / HTTP/1.1rnHost: twitter.comrnrn');
});
cts.on('data', function (data) {
console.log(data.toString());
});
});
req.end();
The node.js program does a HTTP CONNECT request to the proxy and listens for the connect
event. The event handler is passed an instance of net.Socket
which we can use as if we were directly communicating with the remote server. I.e., we need to start by sending the encrypted request and we’ll receive back the encrypted content.
To do the encryption and decryption, we use node.js’s TLS module. The tls.connect
method accepts an options argument which contains the socket we received from the connect event handler. Make sure to also include the host in the options, even though the node.js docs say the host will be ignored when you’re passing a socket. If you don’t pass the host, an error will be thrown, “Hostname/IP doesn’t match certificate’s altnames”. As a second argument, tls.connect
accepts a callback function (a listener to the secureConnect
event, really), in which we do our HTTP request (manually, in this snippet) writing to the tls.ClearTextStream
object returned by tls.connect
.
A HTTPS Proxy Agent
To make life a little easier and also benefit from node.js’s HTTPS client implementation, we can wrap the CONNECT
request and dealing with TLS into a https.Agent
so we only have to do this when doing an HTTPS request via a proxy:
var Https = require('https');
var agent = new HttpsProxyAgent({
proxyHost: '192.168.5.8',
proxyPort: 3128
});
Https.request({
// like you'd do it usually...
host: 'twitter.com',
port: 443,
method: 'GET',
path: '/',
// ... just add the special agent:
agent: agent
}, function (res) {
res.on('data', function (data) {
console.log(data.toString());
});
}).end();
In summary: in addition to a regular HTTPS request, you only need to instantiate an HttpsProxyAgent
and pass the instance in the agent
property of the request options.
To implement the HttpsProxyAgent
, we can reuse most of node.js’s implementation of the http.Agent
and https.Agent
. But we need to overwrite two methods, addRequest
and createSocket
to handle the asynchronous nature of how we receive the socket (the original implementation assumes that createConnection
returns a socket), and so we can emit an error event on the request object when the connection to the proxy fails.
Here we go (Gist):
Update (March 23, 2015): The code below was updated to work with node v0.12.0. See Gist for more information.
var Util = require('util');
var Https = require('https');
var Tls = require('tls');
function HttpsProxyAgent(options) {
Https.Agent.call(this, options);
this.proxyHost = options.proxyHost;
this.proxyPort = options.proxyPort;
this.createConnection = function (opts, callback) {
// do a CONNECT request
var req = Http.request({
host: options.proxyHost,
port: options.proxyPort,
method: 'CONNECT',
path: opts.host + ':' + opts.port,
headers: {
host: opts.host
}
});
req.on('connect', function (res, socket, head) {
var cts = Tls.connect({
host: opts.host,
socket: socket
}, function () {
callback(false, cts);
});
});
req.on('error', function (err) {
callback(err, null);
});
req.end();
}
}
Util.inherits(HttpsProxyAgent, Https.Agent);
// Almost verbatim copy of http.Agent.addRequest
HttpsProxyAgent.prototype.addRequest = function (req, options) {
var name = options.host + ':' + options.port;
if (options.path) name += ':' + options.path;
if (!this.sockets[name]) this.sockets[name] = [];
if (this.sockets[name].length < this.maxSockets) {
// if we are under maxSockets create a new one.
this.createSocket(name, options.host, options.port, options.path, req, function (socket) {
req.onSocket(socket);
});
} else {
// we are over limit so we'll add it to the queue.
if (!this.requests[name])
this.requests[name] = [];
this.requests[name].push(req);
}
};
// Almost verbatim copy of http.Agent.createSocket
HttpsProxyAgent.prototype.createSocket = function (name, host, port, localAddress, req, callback) {
var self = this;
var options = Util._extend({}, self.options);
options.port = port;
options.host = host;
options.localAddress = localAddress;
options.servername = host;
if (req) {
var hostHeader = req.getHeader('host');
if (hostHeader)
options.servername = hostHeader.replace(/:.*$/, '');
}
self.createConnection(options, function (err, s) {
if (err) {
err.message += ' while connecting to HTTP(S) proxy server ' + self.proxyHost + ':' + self.proxyPort;
if (req)
req.emit('error', err);
else
throw err;
return;
}
if (!self.sockets[name]) self.sockets[name] = [];
self.sockets[name].push(s);
var onFree = function () {
self.emit('free', s, host, port, localAddress);
};
var onClose = function (err) {
// this is the only place where sockets get removed from the Agent.
// if you want to remove a socket from the pool, just close it.
// all socket errors end in a close event anyway.
self.removeSocket(s, name, host, port, localAddress);
};
var onRemove = function () {
// we need this function for cases like HTTP 'upgrade'
// (defined by WebSockets) where we need to remove a socket from the pool
// because it'll be locked up indefinitely
self.removeSocket(s, name, host, port, localAddress);
s.removeListener('close', onClose);
s.removeListener('free', onFree);
s.removeListener('agentRemove', onRemove);
};
s.on('free', onFree);
s.on('close', onClose);
s.on('agentRemove', onRemove);
callback(s);
});
};
Image credit: hiroze
hljs.configure({
tabReplace: ‘ ‘,
useBr: true
});
hljs.initHighlightingOnLoad();