# $Id: HTTPD.pm 2155 2006-11-12 07:01:34Z teknikill $
# Filter::HTTPD Copyright 1998 Artur Bergman .
# Thanks go to Gisle Aas for his excellent HTTP::Daemon. Some of the
# get code was copied out if, unfortunately HTTP::Daemon is not easily
# subclassed for POE because of the blocking nature.
# 2001-07-27 RCC: This filter will not support the newer get_one()
# interface. It gets single things by default, and it does not
# support filter switching. If someone absolutely needs to switch to
# and from HTTPD filters, they should submit their request as a patch.
package POE::Filter::HTTPD;
use strict;
use POE::Filter;
use vars qw($VERSION @ISA);
$VERSION = do {my($r)=(q$Revision: 2155 $=~/(\d+)/);sprintf"1.%04d",$r};
@ISA = qw(POE::Filter);
sub BUFFER () { 0 }
sub TYPE () { 1 }
sub FINISH () { 2 }
sub HEADER () { 3 }
sub CLIENT_PROTO () { 4 }
sub HEADER_ONLY () { 5 }
sub CHUNKED_RES () { 6 }
sub EXTRA_METHODS () { 7 }
use Carp qw(croak);
use HTTP::Status qw( status_message RC_BAD_REQUEST RC_OK RC_LENGTH_REQUIRED );
use HTTP::Request ();
use HTTP::Response ();
use HTTP::Date qw(time2str);
use URI ();
my $HTTP_1_0 = _http_version("HTTP/1.0");
my $HTTP_1_1 = _http_version("HTTP/1.1");
#------------------------------------------------------------------------------
sub new {
my $type = shift;
my %opts = @_;
my $self = [
'', # BUFFER
0, # TYPE
0, # FINISH
undef, # HEADER
undef, # CLIENT_PROTO
( $opts{headers_only} ? 1 : 0 ), # HEADER_ONLY
0, # CHUNKED_RES
( $opts{extra_methods} ? $opts{extra_methods} : {} ), # EXTRA_METHODS
];
bless $self, $type;
$self;
}
#------------------------------------------------------------------------------
sub get_one_start {
my ($self, $stream) = @_;
# return if ( $self->[FINISH] ); # XXX check this for correctness!
$stream = [ $stream ] unless ( ref( $stream ) );
$self->[BUFFER] .= join( '', @$stream );
}
sub get_one {
my ($self) = @_;
return ( $self->[FINISH] ) ? [] : $self->get( [] );
}
sub get {
my ($self, $stream) = @_;
# Need to check lengths in octets, not characters.
use bytes;
# Why?
local($_);
# Sanity check. "finish" is set when a request has completely
# arrived. Subsequent get() calls on the same request should not
# happen. -><- Maybe this should return [] instead of dying?
if ($self->[FINISH]) {
# This works around a request length vs. actual content length
# error. Looks like some browsers (mozilla!) sometimes add on an
# extra newline?
# return [] unless @$stream and grep /\S/, @$stream;
my @dump;
my $offset = 0;
$stream = $self->[BUFFER].join("", @$stream);
while (length $stream) {
my $line = substr($stream, 0, 16);
substr($stream, 0, 16) = '';
my $hexdump = unpack 'H*', $line;
$hexdump =~ s/(..)/$1 /g;
$line =~ tr[ -~][.]c;
push @dump, sprintf( "%04x %-47.47s - %s\n", $offset, $hexdump, $line );
$offset += 16;
}
return [
$self->_build_error(
RC_BAD_REQUEST,
"Did not want any more data. Got this:" .
"" . join("", @dump) . "
"
)
];
}
# Accumulate data in a framing buffer.
$self->[BUFFER] .= join('', @$stream);
# If headers were already received, then the framing buffer is
# purely content. Return nothing until content-length bytes are in
# the buffer, then return the entire request.
if ($self->[HEADER]) {
my $buf = $self->[BUFFER];
my $r = $self->[HEADER];
my $cl = $r->content_length() || length($buf) || 0;
# Special handling for chunked requests
if ( $r->header('Transfer-Encoding') =~ /^chunked$/i ) {
return $self->_handle_chunked;
}
# Some browsers (like MSIE 5.01) send extra CRLFs after the
# content. Shame on them. Now we need a special case to drop
# their extra crap.
#
# We use the first $cl octets of the buffer as the request
# content. It's then stripped away. Leading whitespace in
# whatever is left is also stripped away. Any nonspace data left
# over will throw an error.
#
# Four-argument substr() would be ideal here, but it's a
# relatively recent development.
#
# PG- CGI.pm only reads Content-Length: bytes from STDIN.
if (length($buf) >= $cl) {
$r->content(substr($buf, 0, $cl));
$self->[BUFFER] = substr($buf, $cl);
$self->[BUFFER] =~ s/^\s+//;
# We are sending this back, so won't need it anymore.
$self->[HEADER] = undef;
$self->[FINISH]++;
return [$r];
}
#print "$cl wanted, got " . length($buf) . "\n";
return [];
}
# Headers aren't already received. Short-circuit header parsing:
# don't return anything until we've received a blank line.
return [] unless(
$self->[BUFFER] =~ /(\x0D\x0A?\x0D\x0A?|\x0A\x0D?\x0A\x0D?)/s
);
# Copy the buffer for header parsing, and remove the header block
# from the content buffer.
my $buf = $self->[BUFFER];
$self->[BUFFER] =~ s/.*?(\x0D\x0A?\x0D\x0A?|\x0A\x0D?\x0A\x0D?)//s;
# Parse the request line.
if ($buf !~ s/^(\w+)[ \t]+(\S+)(?:[ \t]+(HTTP\/\d+\.\d+))?[^\012]*\012//) {
return [
$self->_build_error(RC_BAD_REQUEST, "Request line parse failure.")
];
}
my $proto = $3 || "HTTP/0.9";
# Use the request line to create a request object.
my $r = HTTP::Request->new($1, URI->new($2));
$r->protocol($proto);
$self->[CLIENT_PROTO] = $proto = _http_version($proto);
# Add the raw request's headers to the request object we'll be
# returning.
if ($proto >= $HTTP_1_0) {
my ($key,$val);
HEADER: while ($buf =~ s/^([^\012]*)\012//) {
$_ = $1;
s/\015$//;
if (/^([\w\-~]+)\s*:\s*(.*)/) {
$r->push_header($key, $val) if $key;
($key, $val) = ($1, $2);
}
elsif (/^\s+(.*)/) {
$val .= " $1";
}
else {
last HEADER;
}
}
$r->push_header($key,$val) if($key);
}
$self->[HEADER] = $r;
# If this is a GET or HEAD request, we won't be expecting a message
# body. Finish up.
my $method = $r->method();
if ($self->[HEADER_ONLY] || ( $method eq 'GET' or $method eq 'HEAD')) {
$self->[FINISH]++;
# We are sending this back, so won't need it anymore.
$self->[HEADER] = undef;
return [$r];
}
# However, if it's any other type of request, check whether the
# entire content has already been received! If so, add that to the
# request and we're done. Otherwise we'll expect a subsequent get()
# call to finish things up.
#print "post:$buf:\END BUFFER\n";
#print length($buf)."-".$r->content_length()."\n";
# Special handling for chunked requests
if ( $r->header('Transfer-Encoding') =~ /^chunked$/i ) {
return $self->_handle_chunked;
}
my $cl = $r->content_length();
unless(defined $cl) {
if($self->[CLIENT_PROTO] == 9) {
return [
$self->_build_error(
RC_BAD_REQUEST,
"POST request detected in an HTTP 0.9 transaction. " .
"POST is not a valid HTTP 0.9 transaction type. " .
"Please verify your HTTP version and transaction content."
)
];
}
elsif ($method eq 'OPTIONS' || $method eq 'CONNECT') {
$self->[FINISH]++;
# OPTIONS requests can have an optional content length
# See http://www.faqs.org/rfcs/rfc2616.html, section 9.2
$self->[HEADER] = undef;
return [$r];
}
elsif ($self->[EXTRA_METHODS]->{$method}) {
$self->[FINISH]++;
$self->[HEADER] = undef;
return [$r];
}
else {
return [
$self->_build_error(RC_LENGTH_REQUIRED, "No content length found.")
];
}
}
unless ($cl =~ /^\d+$/) {
return [
$self->_build_error(
RC_BAD_REQUEST,
"Content length contains non-digits."
)
];
}
if (length($buf) >= $cl) {
$r->content(substr($buf, 0, $cl));
$self->[BUFFER] = substr($buf, $cl);
$self->[BUFFER] =~ s/^\s+//;
$self->[FINISH]++;
# We are sending this back, so won't need it anymore.
$self->[HEADER] = undef;
return [$r];
}
return [];
}
#------------------------------------------------------------------------------
sub put {
my ($self, $responses) = @_;
my @raw;
# HTTP::Response's as_string method returns the header lines
# terminated by "\n", which does not do the right thing if we want
# to send it to a client. Here I've stolen HTTP::Response's
# as_string's code and altered it to use network newlines so picky
# browsers like lynx get what they expect.
foreach (@$responses) {
# XXX stream the non response data?
next unless ( ref( $_ ) );
if ( $self->[CHUNKED_RES] ) {
# send just the next chunk
my $chunk = $_->content;
push @raw,
sprintf( "%X", length $chunk ) . "\x0D\x0A"
. $chunk . "\x0D\x0A";
# If we're closing the connection, send an empty chunk
if ( $_->header('Connection') =~ m/^close$/i ) {
push @raw, '0' . "\x0D\x0A";
$self->[CHUNKED_RES] = 0;
}
# XXX: trailing header support
}
else {
if ( my $te = $_->header('Transfer-Encoding') ) {
if ( $te =~ m/^chunked$/i ) {
$self->[CHUNKED_RES] = 1;
$_->remove_header('Content-Length');
}
}
my $code = $_->code;
my $status_message = status_message($code) || "Unknown Error";
my $message = $_->message || "";
my $proto = $_->protocol || 'HTTP/1.0';
my $status_line = "$proto $code";
$status_line .= " ($status_message)" if $status_message ne $message;
$status_line .= " $message" if length($message);
# Use network newlines, and be sure not to mangle newlines in the
# response's content.
my @headers;
push @headers, $status_line;
push @headers, $_->headers_as_string("\x0D\x0A");
if ( $self->[CHUNKED_RES] ) {
my $chunk = $_->content;
push @raw,
join("\x0D\x0A", @headers, "")
. sprintf( "%X", length $chunk ) . "\x0D\x0A"
. $chunk . "\x0D\x0A";
}
else {
push @raw, join("\x0D\x0A", @headers, "") . $_->content;
}
}
}
# Allow next request after we're done sending the response.
unless ( $self->[CHUNKED_RES] ) {
$self->[FINISH] = 0;
}
\@raw;
}
#------------------------------------------------------------------------------
sub get_pending {
my $self = shift;
return [ $self->[BUFFER] ];
}
#------------------------------------------------------------------------------
# Functions specific to HTTPD;
#------------------------------------------------------------------------------
# Internal function to parse an HTTP status line and return the HTTP
# protocol version.
sub _http_version {
local($_) = shift;
return 0 unless m,^(?:HTTP/)?(\d+)\.(\d+)$,i;
$1 * 1000 + $2;
}
# Build a basic response, given a status, a content type, and some
# content.
sub _build_basic_response {
my ($self, $content, $content_type, $status) = @_;
# Need to check lengths in octets, not characters.
use bytes;
$content_type ||= 'text/html';
$status ||= RC_OK;
my $response = HTTP::Response->new($status);
$response->push_header( 'Content-Type', $content_type );
$response->push_header( 'Content-Length', length($content) );
$response->content($content);
return $response;
}
sub _build_error {
my($self, $status, $details) = @_;
$status ||= RC_BAD_REQUEST;
$details ||= '';
my $message = status_message($status) || "Unknown Error";
return $self->_build_basic_response(
( "" .
"" .
"Error $status: $message" .
"" .
"" .
"Error $status: $message
" .
"$details
" .
"" .
""
),
"text/html",
$status
);
}
sub _handle_chunked {
my $self = shift;
my $r = $self->[HEADER];
if ( $self->[BUFFER] !~ /0\x0D\x0A\x0D\x0A$/ ) {
return [];
}
my $body = '';
while (1) {
if ($self->[BUFFER] =~ s/^([^\012]*)\012//) {
my $chunk_head = $1;
unless ($chunk_head =~ /^([0-9A-Fa-f]+)/) {
return [
$self->_build_error(RC_BAD_REQUEST, "Bad chunk header $chunk_head")
];
}
my $size = hex($1);
last if $size == 0;
$body .= substr($self->[BUFFER], 0, $size);
substr($self->[BUFFER], 0, $size + 2) = '';
}
}
$r->content($body);
# Pretend it was a normal entity body
$r->remove_header('Transfer-Encoding');
$r->header( 'Content-Length' => length($body) );
$self->[BUFFER] =~ s/^\s+//;
$self->[HEADER] = undef;
$self->[FINISH]++;
return [$r];
}
###############################################################################
1;
__END__
=head1 NAME
POE::Filter::HTTPD - convert stream to HTTP::Request; HTTP::Response to stream
=head1 SYNOPSIS
$httpd = POE::Filter::HTTPD->new();
$arrayref_with_http_response_as_string =
$httpd->put($full_http_response_object);
$arrayref_with_http_request_object =
$line->get($arrayref_of_raw_data_chunks_from_driver);
=head1 DESCRIPTION
The HTTPD filter parses the first HTTP 1.0 request from an incoming
stream into an HTTP::Request object (if the request is good) or an
HTTP::Response object (if the request was malformed). To send a
response, give its put() method a HTTP::Response object.
Here is a sample input handler:
sub got_request {
my ($heap, $request) = @_[HEAP, ARG0];
# The Filter::HTTPD generated a response instead of a request.
# There must have been some kind of error. You could also check
# (ref($request) eq 'HTTP::Response').
if ($request->isa('HTTP::Response')) {
$heap->{wheel}->put($request);
return;
}
# Process the request here.
my $response = HTTP::Response->new(200);
$response->push_header( 'Content-Type', 'text/html' );
$response->content( $request->as_string() );
$heap->{wheel}->put($response);
}
Please see the documentation for HTTP::Request and HTTP::Response.
=head1 PUBLIC FILTER METHODS
Please see POE::Filter.
=head1 CAVEATS
It is possible to generate invalid HTTP using libwww. This is specifically a
problem if you are talking to a Filter::HTTPD driven daemon using libwww. For
example, the following code (taken almost verbatim from the
HTTP::Request::Common documentation) will cause an error in a Filter::HTTPD
daemon:
use HTTP::Request::Common;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new();
$ua->request(POST 'http://some/poe/driven/site', [ foo => 'bar' ]);
By default, HTTP::Request is HTTP version agnostic. It makes no attempt to add
an HTTP version header unless you specifically declare a protocol using
C<< $request->protocol('HTTP/1.0') >>.
According to the HTTP 1.0 RFC (1945), when faced with no HTTP version header,
the parser is to default to HTTP/0.9. Filter::HTTPD follows this convention. In
the transaction detailed above, the Filter::HTTPD based daemon will return a 400
error since POST is not a valid HTTP/0.9 request type.
=head1 Streaming Media
It is perfectly possible to use Filter::HTTPD for streaming output
media. Even if it's not possible to change the input filter from
Filter::HTTPD, by setting the output_filter to Filter::Stream and
omitting any content in the HTTP::Response object.
$wheel->put($response); # Without content, it sends just headers.
$wheel->set_output_filter(POE::Filter::Stream->new());
$wheel->put("Raw content.");
=head1 SEE ALSO
POE::Filter.
The SEE ALSO section in L contains a table of contents covering
the entire POE distribution.
=head1 BUGS
=over 4
=item * Keep-alive is not supported.
=item * The full http 1.0 spec is not supported, specifically DELETE, LINK, and UNLINK.
=back
=head1 AUTHORS & COPYRIGHTS
The HTTPD filter was contributed by Artur Bergman.
Please see L for more information about authors and contributors.
=cut