# Copyright (c) 2012-2015, Eucalyptus Systems, Inc.
#
# Permission to use, copy, modify, and/or distribute this software for
# any purpose with or without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
from __future__ import absolute_import
import base64
import calendar
import datetime
import email.utils
import hashlib
import hmac
import os
import re
import tempfile
import time
import warnings
import six
import six.moves.urllib_parse as urlparse
from requestbuilder import Arg
from requestbuilder.auth import BaseAuth
from requestbuilder.exceptions import AuthError
ISO8601 = '%Y-%m-%dT%H:%M:%SZ'
ISO8601_BASIC = '%Y%m%dT%H%M%SZ'
class HmacKeyAuth(BaseAuth):
'''
Basis for AWS HMAC-based authentication
'''
ARGS = [Arg('-I', '--access-key-id', dest='key_id', metavar='KEY_ID'),
Arg('-S', '--secret-key', dest='secret_key', metavar='KEY'),
Arg('--security-token', dest='security_token', metavar='TOKEN')]
@classmethod
def from_other(cls, other, **kwargs):
kwargs.setdefault('loglevel', other.log.level)
kwargs.setdefault('key_id', other.args.get('key_id'))
kwargs.setdefault('secret_key', other.args.get('secret_key'))
kwargs.setdefault('security_token', other.args.get('security_token'))
kwargs.setdefault('credential_expiration',
other.args.get('credential_expiration'))
new = cls(other.config, **kwargs)
new.configure()
return new
def configure(self):
self.__populate_auth_args()
if not self.args.get('key_id'):
raise AuthError('missing access key ID; please supply one with -I')
if not self.args.get('secret_key'):
raise AuthError('missing secret key; please supply one with -S')
if self.args.get('credential_expiration'):
expiration = None
for fmt in ('%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%SZ'):
try:
expiration = datetime.datetime.strptime(
self.args['credential_expiration'], fmt)
break
except ValueError:
continue
else:
self.log.warn(
'failed to parse credential expiration time '
'\'{0}\'; proceeding without validation'
.format(self.args['credential_expiration']))
if expiration and expiration < datetime.datetime.utcnow():
raise AuthError('credentials have expired')
def configure_from_aws_credential_file(self):
if 'AWS_CREDENTIAL_FILE' in os.environ:
path = os.getenv('AWS_CREDENTIAL_FILE')
path = os.path.expandvars(path)
path = os.path.expanduser(path)
with open(path) as credfile:
for line in credfile:
line = line.split('#', 1)[0]
if '=' in line:
(key, val) = line.split('=', 1)
if (key.strip() == 'AWSAccessKeyId' and
not self.args.get('key_id')):
# There's probably a better way to do this, but it
# seems to work for me. Patches are welcome. :)
self.args['key_id'] = val.strip()
elif (key.strip() == 'AWSSecretKey' and
not self.args.get('secret_key')):
self.args['secret_key'] = val.strip()
return path
def __populate_auth_args(self):
"""
Try to get auth info from each source in turn until one provides
both a key ID and a secret key. After each time a source fails
to provide enough info we wipe self.args out so we don't wind up
mixing info from multiple sources.
"""
# self.args gets highest precedence
if self.args.get('key_id') and not self.args.get('secret_key'):
# __reset_unless_ready will wipe out key_id and result in
# the wrong error message
raise AuthError('missing secret key; please supply one with -S')
if self.args.get('secret_key') and not self.args.get('key_id'):
# If only one is supplied at the command line we should
# immediately blow up
raise AuthError('missing access key ID; please supply one with -I')
if self.__reset_unless_ready():
self.log.debug('using auth info provided directly')
return
# Environment comes next
self.args['key_id'] = (os.getenv('AWS_ACCESS_KEY_ID') or
os.getenv('AWS_ACCESS_KEY'))
self.args['secret_key'] = (os.getenv('AWS_SECRET_ACCESS_KEY') or
os.getenv('AWS_SECRET_KEY'))
self.args['security_token'] = os.getenv('AWS_SECURITY_TOKEN')
self.args['credential_expiration'] = \
os.getenv('AWS_CREDENTIAL_EXPIRATION')
if self.__reset_unless_ready():
self.log.debug('using auth info from environment')
return
# See if an AWS credential file was given in the environment
aws_credfile_path = self.configure_from_aws_credential_file()
if aws_credfile_path and self.__reset_unless_ready():
self.log.debug('using auth info from AWS credential file %s',
aws_credfile_path)
return
# Try the config file
self.args['key_id'] = self.config.get_user_option('key-id')
self.args['secret_key'] = self.config.get_user_option('secret-key',
redact=True)
if self.__reset_unless_ready():
self.log.debug('using auth info from configuration')
return
def __reset_unless_ready(self):
"""
If both an access key ID and a secret key are set in self.args
return True. Otherwise, clear auth info from self.args and
return False.
"""
if self.args.get('key_id') and self.args.get('secret_key'):
return True
for arg in ('key_id', 'secret_key', 'security_token',
'credential_expiration'):
self.args[arg] = None
return False
class HmacV1Auth(HmacKeyAuth):
'''
S3 REST authentication
http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html
'''
# This list comes from the CanonicalizedResource section of the above page
HASHED_PARAMS = set((
'acl', 'lifecycle', 'location', 'logging', 'notification',
'partNumber', 'policy', 'requestPayment', 'torrent', 'uploadId',
'uploads', 'versionId', 'versioning', 'versions', 'website'))
def apply_to_request(self, req, service):
self._update_request_before_signing(req)
c_headers = self.get_canonicalized_headers(req)
c_resource = self.get_canonicalized_resource(req, service)
to_sign = self._get_string_to_sign(req, c_headers, c_resource)
self.log.debug('string to sign: %s', repr(to_sign))
signature = self.sign_string(to_sign.encode('utf-8'))
self.log.debug('b64-encoded signature: %s', signature)
self._apply_signature(req, signature)
return req
def apply_to_request_params(self, req, service, expiration_datetime):
# This does not implement security tokens.
msg = ('S3RestAuth.apply_to_request_params is deprecated; use '
'requestbuilder.auth.aws.QueryHmacV1Auth instead')
self.log.warn(msg)
warnings.warn(msg, DeprecationWarning)
for param in ('AWSAccessKeyId', 'Expires', 'Signature'):
req.params.pop(param, None)
expiration = calendar.timegm(expiration_datetime.utctimetuple())
delta_t = expiration_datetime - datetime.datetime.utcnow()
delta_t_sec = ((delta_t.microseconds +
(delta_t.seconds + delta_t.days * 24 * 3600) * 10**6)
/ 10**6)
self.log.debug('expiration: %i (%f seconds from now)',
expiration, delta_t_sec)
c_headers = self.get_canonicalized_headers(req)
self.log.debug('canonicalized headers: %s', repr(c_headers))
c_resource = self.get_canonicalized_resource(req, service)
self.log.debug('canonicalized resource: %s', repr(c_resource))
to_sign = '\n'.join((req.method,
req.headers.get('Content-MD5', ''),
req.headers.get('Content-Type', ''),
six.text_type(expiration),
c_headers + c_resource))
self.log.debug('string to sign: %s', repr(to_sign))
signature = self.sign_string(to_sign.encode('utf-8'))
self.log.debug('b64-encoded signature: %s', signature)
req.params['AWSAccessKeyId'] = self.args['key_id']
req.params['Expires'] = six.text_type(expiration)
req.params['Signature'] = signature
if self.args.get('security_token'):
# This is a guess. I have no evidence that this actually works.
req.params['SecurityToken'] = self.args['security_token']
def _update_request_before_signing(self, req):
if not req.headers:
req.headers = {}
req.headers['Date'] = email.utils.formatdate()
req.headers['Host'] = urlparse.urlparse(req.url).netloc
if self.args.get('security_token'):
req.headers['x-amz-security-token'] = self.args['security_token']
req.headers.pop('Signature', None)
def _get_string_to_sign(self, req, c_headers, c_resource):
return '\n'.join((req.method.upper(),
req.headers.get('Content-MD5', ''),
req.headers.get('Content-Type', ''),
req.headers.get('Date'),
c_headers + c_resource))
def _apply_signature(self, req, signature):
req.headers['Authorization'] = 'AWS {0}:{1}'.format(
self.args['key_id'], signature)
def get_canonicalized_resource(self, req, service):
# /bucket/keyname
parsed_req_path = urlparse.urlparse(req.url).path
assert service.endpoint is not None
parsed_svc_path = urlparse.urlparse(service.endpoint).path
# IMPORTANT: this only supports path-style requests
assert parsed_req_path.startswith(parsed_svc_path)
resource = parsed_req_path[len(parsed_svc_path):]
if parsed_svc_path.endswith('/'):
# The leading / got stripped off
resource = '/' + resource
if not resource:
# This resource does not address a bucket
resource = '/'
# Now append sub-resources, a.k.a. query string parameters
if getattr(req, 'params', None):
# A regular Request
params = req.params
else:
# A PreparedRequest
params = _get_params_from_url(req.url)
if params:
subresources = []
for key, val in sorted(params.iteritems()):
if key in self.HASHED_PARAMS:
if val is None:
subresources.append(key)
else:
subresources.append(key + '=' + val)
if subresources:
resource += '?' + '&'.join(subresources)
self.log.debug('canonicalized resource: %s', repr(resource))
return resource
def get_canonicalized_headers(self, req):
headers_dict = {}
for key, val in req.headers.iteritems():
if key.lower().startswith('x-amz-'):
headers_dict.setdefault(key.lower(), [])
headers_dict[key.lower()].append(' '.join(val.split()))
headers_strs = []
for key, vals in sorted(headers_dict.iteritems()):
headers_strs.append('{0}:{1}'.format(key, ','.join(vals)))
if headers_strs:
c_headers = '\n'.join(headers_strs) + '\n'
else:
c_headers = ''
self.log.debug('canonicalized headers: %s', repr(c_headers))
return c_headers
def sign_string(self, to_sign):
req_hmac = hmac.new(self.args['secret_key'], digestmod=hashlib.sha1)
req_hmac.update(to_sign)
return base64.b64encode(req_hmac.digest())
class QueryHmacV1Auth(HmacV1Auth):
DEFAULT_TIMEOUT = 600 # 10 minutes
def _update_request_before_signing(self, req):
timeout = int(self.args.get('timeout')) or self.DEFAULT_TIMEOUT
assert timeout > 0
params = _get_params_from_url(req.url)
params['AWSAccessKeyId'] = self.args['key_id']
params['Expires'] = int(time.time() + timeout)
params.pop('Signature', None)
req.prepare_url(_remove_params_from_url(req.url), params)
def _get_string_to_sign(self, req, c_headers, c_resource):
params = _get_params_from_url(req.url)
return '\n'.join((req.method.upper(),
req.headers.get('Content-MD5', ''),
req.headers.get('Content-Type', ''),
params['Expires'],
c_headers + c_resource))
def _apply_signature(self, req, signature):
req.prepare_url(req.url, {'Signature': signature})
class QueryHmacV2Auth(HmacKeyAuth):
'''
AWS signature version 2
http://docs.aws.amazon.com/general/latest/gr/signature-version-2.html
'''
def apply_to_request(self, req, service):
parsed = urlparse.urlparse(req.url)
if req.method == 'POST':
# This is probably going to break when given multipart data.
params = urlparse.parse_qs(req.body or '', keep_blank_values=True)
else:
params = urlparse.parse_qs(parsed.query, keep_blank_values=True)
params = dict((key, vals[0]) for key, vals in params.iteritems())
params['AWSAccessKeyId'] = self.args['key_id']
params['SignatureVersion'] = 2
params['SignatureMethod'] = 'HmacSHA256'
params['Timestamp'] = time.strftime(ISO8601, time.gmtime())
if self.args.get('security_token'):
params['SecurityToken'] = self.args['security_token']
# Needed for retries so old signatures aren't included in to_sign
params.pop('Signature', None)
to_sign = '{method}\n{host}\n{path}\n'.format(
method=req.method, host=parsed.netloc.lower(),
path=(parsed.path or '/'))
quoted_params = []
for key in sorted(params):
val = six.text_type(params[key])
quoted_params.append(urlparse.quote(key, safe='') + '=' +
urlparse.quote(val, safe='-_~'))
query_string = '&'.join(quoted_params)
to_sign += query_string
# Redact passwords
redacted_to_sign = re.sub('assword=[^&]*', 'assword=<redacted>',
to_sign)
self.log.debug('string to sign: %s', repr(redacted_to_sign))
signature = self.sign_string(to_sign)
self.log.debug('b64-encoded signature: %s', signature)
params['Signature'] = signature
if req.method == 'POST':
req.prepare_body(params, {})
else:
req.prepare_url(_remove_params_from_url(req.url), params)
return req
def sign_string(self, to_sign):
req_hmac = hmac.new(self.args['secret_key'], digestmod=hashlib.sha256)
req_hmac.update(to_sign)
return base64.b64encode(req_hmac.digest())
class HmacV4Auth(HmacKeyAuth):
"""
AWS signature version 4
http://docs.aws.amazon.com/general/latest/gr/signature-version-4.html
"""
def apply_to_request(self, req, service):
if not service.NAME:
self.log.critical('service class %s must have a NAME attribute '
'to use sigv4', service.__class__.__name__)
raise AuthError('BUG: service class {0} does not have a name'
.format(service.__class__.__name__))
payload_hash = self._hash_payload(req) # large files will be slow here
now = time.time()
date_header = time.strftime(ISO8601_BASIC, time.gmtime(now))
scope = self._build_scope(service, now)
credential = '/'.join((self.args['key_id'],) + scope)
self._update_request_before_signing(req, credential, payload_hash,
date_header)
c_uri = self._get_canonical_uri(req)
c_query = self._get_canonical_query(req)
c_headers = self._get_canonical_headers(req)
s_headers = self._get_signed_headers(req)
c_request = '\n'.join((req.method.upper(), c_uri, c_query, c_headers,
'', s_headers, payload_hash))
self.log.debug('canonical request: %s', repr(c_request))
to_sign = '\n'.join(('AWS4-HMAC-SHA256', date_header, '/'.join(scope),
hashlib.sha256(c_request).hexdigest()))
# Redact passwords
redacted_to_sign = re.sub('assword=[^&]*', 'assword=<redacted>',
to_sign)
self.log.debug('string to sign: %s', repr(redacted_to_sign))
derived_hmac = hmac.new('AWS4{0}'.format(self.args['secret_key']),
digestmod=hashlib.sha256)
for chunk in scope:
derived_hmac.update(chunk)
derived_hmac = hmac.new(derived_hmac.digest(),
digestmod=hashlib.sha256)
derived_hmac.update(to_sign)
signature = derived_hmac.hexdigest()
self.log.debug('signature: %s', signature)
self._apply_signature(req, credential, signature)
return req
def _update_request_before_signing(self, req, credential, payload_sha256,
date_header):
parsed = urlparse.urlparse(req.url)
req.headers['Host'] = parsed.netloc
req.headers.pop('Authorization', None)
req.headers['X-Amz-Content-SHA256'] = payload_sha256
req.headers['X-Amz-Date'] = date_header
if self.args.get('security_token'):
req.headers['X-Amz-Security-Token'] = self.args['security_token']
def _apply_signature(self, req, credential, signature):
auth_header = ', '.join((
'AWS4-HMAC-SHA256 Credential={0}'.format(credential),
'SignedHeaders={0}'.format(self._get_signed_headers(req)),
'Signature={0}'.format(signature)))
req.headers['Authorization'] = auth_header
def _build_scope(self, service, timestamp):
if service.region_name:
region = service.region_name
elif os.getenv('AWS_AUTH_REGION'):
region = os.getenv('AWS_AUTH_REGION')
else:
self.log.error('a region name is required to use sigv4')
raise AuthError(
"region name is required; either use a config file "
"to supply the service's URL or set AWS_AUTH_REGION "
"in the environment")
scope = (time.strftime('%Y%m%d', time.gmtime(timestamp)),
region, service.NAME, 'aws4_request')
self.log.debug('scope: %s', '/'.join(scope))
return scope
def _get_canonical_uri(self, req):
path = urlparse.urlsplit(req.url).path or '/'
# TODO: Normalize stuff like ".."
c_uri = urlparse.quote(path, safe='/~')
self.log.debug('canonical URI: %s', c_uri)
return c_uri
def _get_canonical_query(self, req):
req_params = urlparse.parse_qsl(urlparse.urlparse(req.url).query,
keep_blank_values=True)
params = []
for key, val in sorted(req_params or []):
params.append('='.join((urlparse.quote(key, safe='~-_.'),
urlparse.quote(val, safe='~-_.'))))
c_params = '&'.join(params)
self.log.debug('canonical query: %s', c_params)
return c_params
def _get_normalized_headers(self, req):
# This doesn't currently support multi-value headers.
headers = {}
for key, val in req.headers.iteritems():
if key.lower() not in ('connection', 'user-agent'):
# Reverse proxies like to rewrite Connection headers.
# Ignoring User-Agent lets us generate storable query URLs
headers[key.lower().strip()] = val.strip()
return headers
def _get_canonical_headers(self, req):
headers = []
normalized_headers = self._get_normalized_headers(req)
for key, val in sorted(normalized_headers.items()):
headers.append(':'.join((key, val)))
self.log.debug('canonical headers: %s', str(headers))
return '\n'.join(headers)
def _get_signed_headers(self, req):
normalized_headers = self._get_normalized_headers(req)
s_headers = ';'.join(sorted(normalized_headers))
self.log.debug('signed headers: %s', s_headers)
return s_headers
def _hash_payload(self, req):
if self.args.get('payload_hash'):
return self.args['payload_hash']
digest = hashlib.sha256()
if not req.body:
pass
elif hasattr(req.body, 'seek'):
body_position = req.data.tell()
self.log.debug('payload hashing starting')
while True:
chunk = req.body.read(16384)
if not chunk:
break
digest.update(chunk)
req.body.seek(body_position)
self.log.debug('payload hashing done')
elif hasattr(req.body, 'read'):
self.log.debug('payload spooling/hashing starting')
# 10M happens to be the size of a bundle part, the thing we upload
# most frequently.
spool = tempfile.SpooledTemporaryFile(max_size=(10 * 1024 * 1024))
while True:
chunk = req.body.read(16384)
if not chunk:
break
digest.update(chunk)
spool.write(chunk)
self.log.debug('payload spooling/hashing done')
spool.seek(0)
self.log.info('re-pointing request body at spooled payload')
req.body = spool
# Should we close the original req.body here?
else:
digest.update(req.body)
self.log.debug('payload hash: %s', digest.hexdigest())
return digest.hexdigest()
class QueryHmacV4Auth(HmacV4Auth):
def _update_request_before_signing(self, req, credential, payload_sha256,
date_header):
# We don't do anything with payload_sha256. Is that bad?
if (req.method.upper() == 'POST' and
'form-urlencoded' in req.headers.get('Content-Type', '')):
self.log.warn('Query string authentication and POST form data '
'are generally mutually exclusive; GET is '
'recommended instead')
parsed = urlparse.urlparse(req.url)
req.headers['Host'] = parsed.netloc
req.headers.pop('Authorization', None)
params = {
'X-Amz-Algorithm': 'AWS4-HMAC-SHA256',
'X-Amz-Credential': credential,
'X-Amz-Date': date_header,
'X-Amz-SignedHeaders': self._get_signed_headers(req)}
if self.args.get('timeout'):
params['X-Amz-Expires'] = self.args['timeout']
if self.args.get('security_token'):
params['X-Amz-Security-Token'] = self.args['security_token']
req.prepare_url(req.url, params)
def _apply_signature(self, req, credential, signature):
req.prepare_url(req.url, {'X-Amz-Signature': signature})
def _get_params_from_url(url):
"""
Given a URL, return a dict of parameters and their values. If a
parameter appears more than once all but the first value will be lost.
"""
parsed = urlparse.urlparse(url)
params = urlparse.parse_qs(parsed.query, keep_blank_values=True)
return dict((key, vals[0]) for key, vals in params.iteritems())
def _remove_params_from_url(url):
"""
Return a copy of a URL with its parameters, fragments, and query
string removed.
"""
parsed = urlparse.urlparse(url)
return urlparse.urlunparse((parsed[0], parsed[1], parsed[2], '', '', ''))