"""An NNTP client class based on:
- RFC 977: Network News Transfer Protocol
- RFC 2980: Common NNTP Extensions
- RFC 3977: Network News Transfer Protocol (version 2)
>>> from nntplib import NNTP
>>> resp, count, first, last, name = s.group('comp.lang.python')
>>> print('Group', name, 'has', count, 'articles, range', first, 'to', last)
Group comp.lang.python has 51 articles, range 5770 to 5821
>>> resp, subs = s.xhdr('subject', '{0}-{1}'.format(first, last))
Here 'resp' is the server response line.
Error responses are turned into exceptions.
To post an article from a file:
>>> f = open(filename, 'rb') # file containing article, including header
For descriptions of all methods, read the comments in the code below.
Note that all arguments and return values representing article numbers
are strings, not numbers, since they are rarely used for calculations.
# RFC 977 by Brian Kantor and Phil Lapsley.
# xover, xgtitle, xpath, date methods by Kevan Heydon
# Incompatible changes from the 2.x nntplib:
# - all commands are encoded as UTF-8 data (using the "surrogateescape"
# error handler), except for raw message data (POST, IHAVE)
# - all responses are decoded as UTF-8 data (using the "surrogateescape"
# error handler), except for raw message data (ARTICLE, HEAD, BODY)
# - the `file` argument to various methods is keyword-only
# - NNTP.date() returns a datetime object
# - NNTP.newgroups() and NNTP.newnews() take a datetime (or date) object,
# rather than a pair of (date, time) strings.
# - NNTP.newgroups() and NNTP.list() return a list of GroupInfo named tuples
# - NNTP.descriptions() returns a dict mapping group names to descriptions
# - NNTP.xover() returns a list of dicts mapping field names (header or metadata)
# to field values; each dict representing a message overview.
# - NNTP.article(), NNTP.head() and NNTP.body() return a (response, ArticleInfo)
# - the "internal" methods have been marked private (they now start with
# Other changes from the 2.x/3.1 nntplib:
# - automatic querying of capabilities at connect
# - New method NNTP.getcapabilities()
# - New method NNTP.over()
# - New helper function decode_header()
# - NNTP.post() and NNTP.ihave() accept file objects, bytes-like objects and
# arbitrary iterables yielding lines.
# - An extensive test suite :-)
# - return structured data (GroupInfo etc.) everywhere
from email.header import decode_header as _email_decode_header
from socket import _GLOBAL_DEFAULT_TIMEOUT
"NNTPError", "NNTPReplyError", "NNTPTemporaryError",
"NNTPPermanentError", "NNTPProtocolError", "NNTPDataError",
# maximal line length when calling readline(). This is to prevent
# reading arbitrary length lines. RFC 3977 limits NNTP line length to
# 512 characters, including CRLF. We have selected 2048 just to be on
# Exceptions raised when an error or invalid response is received
class NNTPError(Exception):
"""Base class for all nntplib exceptions"""
def __init__(self, *args):
Exception.__init__(self, *args)
self.response = 'No response given'
class NNTPReplyError(NNTPError):
"""Unexpected [123]xx reply"""
class NNTPTemporaryError(NNTPError):
class NNTPPermanentError(NNTPError):
class NNTPProtocolError(NNTPError):
"""Response does not begin with [1-5]"""
class NNTPDataError(NNTPError):
"""Error in response data"""
# Standard port used by NNTP servers
# Response numbers that are followed by additional text (e.g. article)
'211', # LISTGROUP (also not multi-line with GROUP)
# Default decoded value for LIST OVERVIEW.FMT if not supported
_DEFAULT_OVERVIEW_FMT = [
"subject", "from", "date", "message-id", "references", ":bytes", ":lines"]
# Alternative names allowed in LIST OVERVIEW.FMT response
_OVERVIEW_FMT_ALTERNATIVES = {
# Line terminators (we always output CRLF, but accept any of CRLF, CR, LF)
GroupInfo = collections.namedtuple('GroupInfo',
['group', 'last', 'first', 'flag'])
ArticleInfo = collections.namedtuple('ArticleInfo',
['number', 'message_id', 'lines'])
def decode_header(header_str):
"""Takes a unicode string representing a munged header value
and decodes it as a (possibly non-ASCII) readable value."""
for v, enc in _email_decode_header(header_str):
parts.append(v.decode(enc or 'ascii'))
def _parse_overview_fmt(lines):
"""Parse a list of string representing the response to LIST OVERVIEW.FMT
and return a list of header/metadata names.
Raises NNTPDataError if the response is not compliant
(cf. RFC 3977, section 8.4)."""
# Metadata name (e.g. ":bytes")
name, _, suffix = line[1:].partition(':')
# Header name (e.g. "Subject:" or "Xref:full")
name, _, suffix = line.partition(':')
name = _OVERVIEW_FMT_ALTERNATIVES.get(name, name)
# Should we do something with the suffix?
defaults = _DEFAULT_OVERVIEW_FMT
if len(fmt) < len(defaults):
raise NNTPDataError("LIST OVERVIEW.FMT response too short")
if fmt[:len(defaults)] != defaults:
raise NNTPDataError("LIST OVERVIEW.FMT redefines default fields")
def _parse_overview(lines, fmt, data_process_func=None):
"""Parse the response to an OVER or XOVER command according to the
overview format `fmt`."""
n_defaults = len(_DEFAULT_OVERVIEW_FMT)
article_number, *tokens = line.split('\t')
article_number = int(article_number)
for i, token in enumerate(tokens):
# XXX should we raise an error? Some servers might not
# support LIST OVERVIEW.FMT and still return additional
is_metadata = field_name.startswith(':')
if i >= n_defaults and not is_metadata:
# Non-default header names are included in full in the response
# (unless the field is totally empty)
if token and token[:len(h)].lower() != h:
raise NNTPDataError("OVER/XOVER response doesn't include "
"names of additional headers")
token = token[len(h):] if token else None
overview.append((article_number, fields))
def _parse_datetime(date_str, time_str=None):
"""Parse a pair of (date, time) strings, and return a datetime object.
If only the date is given, it is assumed to be date and time
concatenated together (e.g. response to the DATE command).
hours = int(time_str[:2])
minutes = int(time_str[2:4])
seconds = int(time_str[4:])
year = int(date_str[:-4])
month = int(date_str[-4:-2])
# RFC 3977 doesn't say how to interpret 2-char years. Assume that
# there are no dates before 1970 on Usenet.
return datetime.datetime(year, month, day, hours, minutes, seconds)
def _unparse_datetime(dt, legacy=False):
"""Format a date or datetime object as a pair of (date, time) strings
in the format required by the NEWNEWS and NEWGROUPS commands. If a
date object is passed, the time is assumed to be midnight (00h00).
The returned representation depends on the legacy flag:
* if legacy is False (the default):
date has the YYYYMMDD format and time the HHMMSS format
date has the YYMMDD format and time the HHMMSS format.
RFC 3977 compliant servers should understand both formats; therefore,
legacy is only needed when talking to old servers.
if not isinstance(dt, datetime.datetime):
time_str = "{0.hour:02d}{0.minute:02d}{0.second:02d}".format(dt)
date_str = "{0:02d}{1.month:02d}{1.day:02d}".format(y, dt)
date_str = "{0:04d}{1.month:02d}{1.day:02d}".format(y, dt)
return date_str, time_str
def _encrypt_on(sock, context, hostname):
"""Wrap a socket in SSL/TLS. Arguments:
- context: SSL context to use for the encrypted connection
- sock: New, encrypted socket.
# Generate a default SSL context if none was passed.
context = ssl._create_stdlib_context()
return context.wrap_socket(sock, server_hostname=hostname)
# UTF-8 is the character set for all NNTP commands and responses: they
# are automatically encoded (when sending) and decoded (and receiving)
# However, some multi-line data blocks can contain arbitrary bytes (for
# example, latin-1 or utf-16 data in the body of a message). Commands
# taking (POST, IHAVE) or returning (HEAD, BODY, ARTICLE) raw message
# data will therefore only accept and produce bytes objects.
# Furthermore, since there could be non-compliant servers out there,
# we use 'surrogateescape' as the error handler for fault tolerance
# and easy round-tripping. This could be useful for some applications
errors = 'surrogateescape'
def __init__(self, file, host,
readermode=None, timeout=_GLOBAL_DEFAULT_TIMEOUT):
"""Initialize an instance. Arguments:
- file: file-like object (open for read/write in binary mode)
- host: hostname of the server
- readermode: if true, send 'mode reader' command after
- timeout: timeout (in seconds) used for socket connections
readermode is sometimes necessary if you are connecting to an
NNTP server on the local machine and intend to call
reader-specific commands, such as `group'. If you get
unexpected NNTPPermanentErrors, you might need to set
self.welcome = self._getresp()
# Inquire about capabilities (RFC 3977).
# 'MODE READER' is sometimes necessary to enable 'reader' mode.
# However, the order in which 'MODE READER' and 'AUTHINFO' need to
# arrive differs between some NNTP servers. If _setreadermode() fails
# with an authorization failed error, it will set this to True;
# the login() routine will interpret that as a request to try again
# after performing its normal function.
# Enable only if we're not already in READER mode anyway.
self.readermode_afterauth = False
if readermode and 'READER' not in self._caps:
if not self.readermode_afterauth:
# Capabilities might have changed after MODE READER
# RFC 4642 2.2.2: Both the client and the server MUST know if there is
# a TLS session active. A client MUST NOT attempt to start a TLS
# session if a TLS session is already active.
# Log in and encryption setup order is left to subclasses.
self.authenticated = False
def __exit__(self, *args):
is_connected = lambda: hasattr(self, "file")
except (OSError, EOFError):
"""Get the welcome message from the server
(this is read and squirreled away by __init__()).
If the response code is 200, posting is allowed;
if it 201, posting is not allowed."""
if self.debugging: print('*welcome*', repr(self.welcome))
def getcapabilities(self):
"""Get the server capabilities, as read by __init__().
If the CAPABILITIES command is not supported, an empty dict is
self.nntp_implementation = None
resp, caps = self.capabilities()
except (NNTPPermanentError, NNTPTemporaryError):
# Server doesn't support capabilities
# The server can advertise several supported versions,
self.nntp_version = max(map(int, caps['VERSION']))
if 'IMPLEMENTATION' in caps:
self.nntp_implementation = ' '.join(caps['IMPLEMENTATION'])
def set_debuglevel(self, level):
"""Set the debugging level. Argument 'level' means:
0: no debugging output (default)
1: print commands and responses but not body text etc.
2: also print raw lines read and sent before stripping CR/LF"""
def _putline(self, line):
"""Internal: send one line to the server, appending CRLF.
The `line` must be a bytes-like object."""
sys.audit("nntplib.putline", self, line)
if self.debugging > 1: print('*put*', repr(line))
"""Internal: send one command to the server (through _putline()).
The `line` must be a unicode string."""
if self.debugging: print('*cmd*', repr(line))
line = line.encode(self.encoding, self.errors)
def _getline(self, strip_crlf=True):
"""Internal: return one line from the server, stripping _CRLF.
Raise EOFError if the connection is closed.
Returns a bytes object."""
line = self.file.readline(_MAXLINE +1)
raise NNTPDataError('line too long')
print('*get*', repr(line))
if not line: raise EOFError
"""Internal: get a response from the server.
Raise various errors if the response indicates an error.
Returns a unicode string."""
if self.debugging: print('*resp*', repr(resp))
resp = resp.decode(self.encoding, self.errors)
raise NNTPTemporaryError(resp)
raise NNTPPermanentError(resp)
raise NNTPProtocolError(resp)
def _getlongresp(self, file=None):
"""Internal: get a response plus following text from the server.
Raise various errors if the response indicates an error.
Returns a (response, lines) tuple where `response` is a unicode
string and `lines` is a list of bytes objects.
If `file` is a file-like object, it must be open in binary mode.
# If a string was passed then open a file with that name
if isinstance(file, (str, bytes)):
openedFile = file = open(file, "wb")
if resp[:3] not in _LONGRESP:
raise NNTPReplyError(resp)
# XXX lines = None instead?
terminators = (b'.' + _CRLF, b'.\n')
line = self._getline(False)
if line.startswith(b'..'):
if line.startswith(b'..'):