[HOME]

Path : /usr/bin/
Upload :
Current File : //usr/bin/cloud-run-instances

#!/usr/bin/python
#
#    Copyright (C) 2010 Canonical Ltd.
#
#    Authors: Dustin Kirkland <kirkland@canonical.com>
#             Scott Moser <scott.moser@canonical.com>
#             Clint Byrum <clint.byrum@canonical.com>
#             Tom Ellis <tom.ellis@canonical.com>
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, version 3 of the License.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


import os
import string
import sys
import signal
import re
import base64
from optparse import OptionParser
from socket import getaddrinfo
import time
import logging
from paramiko import SSHClient, AutoAddPolicy, AuthenticationException
import paramiko
from subprocess import Popen, PIPE

finished = "FINISHED"

CC_IMPORT_SSH = """#cloud-config
runcmd:
 - [ sudo, -Hu, ubuntu, sh, '-c',
     "c=ssh-import-id; which $c >/dev/null || c=ssh-import-lp-id; $c $1",
     "--", "%s" ]
"""


class SafeConnectException(Exception):
    pass


class Instance(object):
    pass


class TemporaryMissingHostKeyPolicy(AutoAddPolicy):
    """ does not save to known_hosts, but does save the keys in an array """
    def __init__(self):
        self._keys = []
        AutoAddPolicy.__init__(self)

    def missing_host_key(self, client, hostname, key):
        self._keys.append(key)

    def getKeys(self):
        return self._keys


class PermanentMissingHostKeyPolicy(TemporaryMissingHostKeyPolicy):
    """ also has the behavor of the parent AutoAddPolicy """
    def missing_host_key(self, client, hostname, key):
#TemporaryMissingHostKeyPolicy.missing_host_key(self, client, hostname, key)
        self._keys.append(key)
        AutoAddPolicy.missing_host_key(self, client, hostname, key)


class ConsoleFingerprintScanner(object):
    def __init__(self, instance_id, hostname, provider, options, sleeptime=30):
        self.state = "working"
        self.instance_id = instance_id
        self.hostname = hostname
        self.provider = provider
        self.sleeptime = sleeptime
        self.fingerprint = None
        self.options = options
        self.logger = logging.getLogger('console-scanner(%s)' % instance_id)

    def scan(self):
        self.logger.debug('scraping fingerprints for instance_id = %s',
            self.instance_id)
        try:
            while self.fingerprint is None:
                console_data = self.get_console_output()
                self.fingerprint = self.get_fingerprints_in_console_data(
                    console_data)
                if self.fingerprint is not None:
                    self.fingerprint = (int(self.fingerprint[0]),
                        self.fingerprint[1], self.fingerprint[3])
                else:
                    self.logger.debug('sleeping %d seconds',
                        self.options.sleep_time)
                    time.sleep(self.options.sleep_time)
        except None:
            pass
        return self.fingerprint

    def get_console_output(self):
        cmd = '%s-get-console-output' % self.provider
        args = [cmd]
        args.append(self.instance_id)

        self.logger.debug('running %s', args)
        rconsole = Popen(args, stdout=PIPE)

        ret = []
        try:
            for line in rconsole.stdout:
                ret.append(line.strip())
        finally:
            cmdout = rconsole.wait()

        if bool(cmdout):
            raise Exception('%s failed with return code = %d', cmd, cmdout)

        return ret

    def get_fingerprints_in_console_data(self, output):
        # return an empty list on "no keys found"
        # return a list of key fingerprint data on success
        #  where each key fingerprint data is an array like:
        #   (2048 c7:c8:1d:0f:d9:....0a:8a:fe localhost (RSA))
        begin_marker = "-----BEGIN SSH HOST KEY FINGERPRINTS----"
        end_marker = "----END SSH HOST KEY FINGERPRINTS-----"
        i = 0
        while i < len(output):
            if output[i].find(begin_marker) > -1:
                while i < len(output) and output[i].find(end_marker) == -1:
                    self.logger.debug(output[i].strip())
                    toks = output[i].split(" ")
                    self.logger.debug(toks)
                    if len(toks) == 5:
                        # rip off "ec2:"
                        toks = toks[1:]
                    if len(toks) == 4 and toks[3] == "(RSA)":
                        self.logger.debug('found %s on line %d', toks, i)
                        return((toks))
                    i = i + 1
                break
            i = i + 1
        self.logger.debug(
            'did not find any fingerprints in output! (lines=%d)', i)
        return None


class SshKeyScanner(object):
    def __init__(self, instance_id, hostname, options, sleeptime=30):
        self.state = "working"
        self.instance_id = instance_id
        self.hostname = hostname
        self.sleeptime = sleeptime
        self.fingerprint = None
        self.keys = None
        self.options = options
        self.port = 22
        self.logger = logging.getLogger('ssh-key-scanner(%s)' % instance_id)
        self.client = None
        self.connected = False

    def scan(self):
        self.logger.debug('getting fingerprints for %s', self.hostname)
        try:
            fingerprints = self.get_fingerprints_for_host()
            self.logger.debug('fingerprints = %s', fingerprints)
            if (len(fingerprints) > 0):
                self.state = "finished"
                self.fingerprint = fingerprints[0]
        except None:
            pass
        return self.fingerprint

    def get_fingerprints_for_host(self):
        # return an empty list on "no keys found"
        # return a list of key fingerprint data on success
        #  where each key fingerprint data is an array like:
        #   (2048 c7:c8:1d:0f:d9:..:6f:0a:8a:fe localhost (RSA))

        # use paramiko here
        self.client = SSHClient()
        client = self.client
        client.set_log_channel('ssh-key-scanner(%s)' % self.instance_id)

        if self.options.known_hosts is not None:
            policy = PermanentMissingHostKeyPolicy()
            """ This step ensures we save the keys, otherwise that step will be
                skipped in AutoAddPolicy.missing_host_key """
            for path in self.options.known_hosts:
                if not os.path.isfile(path):
                    # if the file doesn't exist, then
                    # create it empty
                    fp = open(path, "w")
                    fp.close()
                client.load_host_keys(path)
        else:
            policy = TemporaryMissingHostKeyPolicy()
        client.set_missing_host_key_policy(policy)

        pkey = None
        if self.options.privkey is not None:
            # TODO support password protected key file
            pkey = paramiko.RSAKey.from_private_key_file(self.options.privkey)

        retries = 0

        allkeys = []

        while 1:
            try:
                client.connect(self.hostname, self.port,
                    username=self.options.ssh_user, pkey=pkey)
                self.connected = True
                break
            except AuthenticationException as (message):
                self.logger.warning('auth failed (non fatal) %s', message)
                break
            except Exception as (e):
                retries += 1
                if retries > 5:
                    raise Exception('gave up after retrying ssh %d times' %
                                    retries)
                self.logger.info(e)
                self.logger.debug('retry #%d... sleeping %d seconds..',
                    retries, self.options.sleep_time)
                time.sleep(self.options.sleep_time)

        rlist = []

        allkeys.extend(policy.getKeys())
        allkeys.append(client.get_transport().get_remote_server_key())

        for key in allkeys:

            if type(key) == paramiko.RSAKey or type(key) == paramiko.PKey:
                keytype = '(RSA)'
            elif type(key) == paramiko.DSSKey:
                keytype = '(DSA)'
            else:
                raise Exception('Cannot handle type %s == %s' %
                    (type(key).__name__, key))

            fp = key.get_fingerprint().encode("hex")
            fp = ':'.join(re.findall('..', fp))
            rlist.append((key.get_bits(), fp, keytype))

        return rlist

    def run_commands(self):
        if (self.options.ssh_run_cmd is not None and
            len(self.options.ssh_run_cmd)):
            if not self.connected:
                self.logger.critical('cannot run command, ssh did not connect')
                sys.exit(1)
            ecmd = ' '.join(self.options.ssh_run_cmd)
            self.logger.debug('running %s', ecmd)
            inouterr = self.client.exec_command(ecmd)
            try:
                for line in inouterr[1]:
                    print line,
            except:
                pass
            try:
                for line in inouterr[2]:
                    print >> sys.stderr(line)
            except:
                pass

        if self.connected:
            self.client.close()
            self.connected = False


def get_auto_instance_type(ami_id, provider):
    cmd = '%s-describe-images' % provider
    args = [cmd, ami_id]
    logging.debug('running %s', args)
    rimages = Popen(args, stdout=PIPE)
    deftype = {'i386': 'm1.small', 'x86_64': 'm1.large'}

    try:
        for line in rimages.stdout:
            # Just in case there are %'s, don't confusee logging
            # XXX print these out instead
            logging.debug(line.replace('%', '%%').strip())
            parts = line.split("\t")
            if parts[0] == 'IMAGE':
                itype = parts[7]
                if itype in deftype:
                    logging.info('auto instance type = %s', deftype[itype])
                    return deftype[itype]
    finally:
        rcode = rimages.wait()

    logging.warning('ami not found, returning default m1.small')
    return("m1.small")


def timeout_handler(signum, frame):
    logging.critical('timeout reached, exiting')
    sys.exit(1)


def handle_runargs(option, opt_str, value, parser):
    delim = getattr(parser.values, "runargs_delim", None)
    cur = getattr(parser.values, "runargs", [])
    if cur is None:
        cur = []
    cur.extend(value.split(delim))
    setattr(parser.values, "runargs", cur)
    return


def main():
    parser = OptionParser(
        usage="usage: %prog [options] ids|(-- raw args for provider scripts)")
    parser.add_option("-t", "--instance-type", dest="inst_type",
        help="instance type", metavar="TYPE",
        default="auto")
    parser.add_option("-k", "--key", dest="keypair_name",
        help="keypair name", metavar="TYPE",
        default="auto")
    parser.add_option("-n", "--instance-count", dest="count",
        help="instance count", metavar="TYPE", type="int",
        default=1)
    parser.add_option("", "--ssh-privkey", dest="privkey",
        help="private key to connect with (ssh -i)", metavar="id_rsa",
        default=None)
    parser.add_option("", "--ssh-pubkey", dest="pubkey",
        help="public key to insert into image)", metavar="id_rsa.pub",
        default=None)
    parser.add_option("", "--ssh-run-cmd", dest="ssh_run_cmd",
        action="append", nargs=0,
        help="run this command when ssh'ing", default=None)
    parser.add_option("", "--ssh-user", dest="ssh_user",
        help="connect with ssh as user", default=None)
    parser.add_option("", "--associate-ip", dest="ip",
        help="associate elastic IP with instance", metavar="IP_ADDR",
        default=None)
    parser.add_option("", "--attach-volume", dest="vol",
        help="attach EBS volume with instance", metavar="VOLUME_ID",
        default=None)
    parser.add_option("", "--known-hosts", dest="known_hosts", action="append",
        metavar="KnownHosts", default=None,
        help="write host keys to specified known_hosts file. "
             "Specify multiple times to read keys from multiple files "
             "(only updates last one)")
    parser.add_option("-l", "--launchpad-id", dest="launchpad_id",
        action="append", metavar="lpid", default=None,
        help="launchpad ids to pull SSH keys from "
             "(multiple times adds to the list)")
    parser.add_option("-i", "--instance-ids", dest="instance_ids",
        action="store_true", default=False,
        help="expect instance ids instead of ami ids,"
             "skips -run-instances")
    parser.add_option("", "--all-instances", dest="all_instances",
        action="store_true", default=False,
        help="query all instances already defined "
             "(running/pending/terminated/etc)")
    parser.add_option("", "--run-args", dest="runargs", action="callback",
        callback=handle_runargs, type="string",
         help="pass option through to run-instances")
    parser.add_option("", "--run-args-delim", dest="runargs_delim",
        help="split run-args options with delimiter",
        default=None)
    parser.add_option("", "--verify-ssh", dest="verify_ssh",
        action="store_true",
        help="verify SSH keys against console output (implies --wait-for=ssh)",
        default=False)
    parser.add_option("", "--wait-for", dest="wait_for",
        help="wait for one of: ssh , running", default=None)
    parser.add_option("-p", "--provider", dest="provider",
        help="either euca or ec2", default=None)
    parser.add_option("-v", "--verbose", action="count", dest="loglevel",
        help="increase logging level", default=3)
    parser.add_option("-q", "--quiet", action="store_true", dest="quiet",
        help="produce no output or error messages", default=False)
    parser.add_option("", "--sleep-time", dest="sleep_time",
        help="seconds to sleep between polling", default=2)
    parser.add_option("", "--teardown", dest="teardown", action="store_true",
        help="terminate instances at the end", default=False)

    (options, args) = parser.parse_args()

    if (os.path.basename(sys.argv[0]).startswith("uec") and
        os.getenv("CLOUD_UTILS_WARN_UEC", "0") == "0"):
        sys.stderr.write("WARNING: '%s' is now 'cloud-run-instances'. %s\n" %
            (os.path.basename(sys.argv[0]), "Please update tools or docs"))

    if len(args) < 1 and not options.all_instances:
        parser.error('you must pass at least one ami ID')

    # loglevel should be *reduced* every time -v is passed,
    # see logging docs for more
    if options.quiet:
        sys.stderr = open('/dev/null', 'w')
        sys.stdout = sys.stderr
    else:
        loglevel = 6 - options.loglevel
        if loglevel < 1:
            loglevel = 1
        # logging module levels are 0,10,20,30 ...
        loglevel = loglevel * 10

        logging.basicConfig(level=loglevel,
            format="%(asctime)s %(name)s/%(levelname)s: %(message)s",
            stream=sys.stderr)

        logging.debug("loglevel = %d", loglevel)

    provider = options.provider
    if options.provider is None:
        provider = os.getenv('EC2PRE', 'euca')

    if options.ssh_run_cmd == [()]:
        options.ssh_run_cmd = args

    if options.known_hosts is None:
        options.known_hosts = [os.path.expanduser('~/.ssh/known_hosts')]

    if options.known_hosts is not None and len(options.known_hosts):
        path = None
        for path in options.known_hosts:
            if not os.access(path, os.R_OK):
                logging.warning('known_hosts file %s is not readable!', path)
        # paramiko writes to the last one
        if not os.access(path, os.W_OK):
            logging.critical('known_hosts file %s is not writable!', path)

    logging.debug("provider = %s", provider)

    logging.debug("instance type is %s", options.inst_type)

    if options.instance_ids or options.all_instances:

        if options.all_instances:
            pending_instance_ids = ['']
        else:
            pending_instance_ids = args

    else:

        if len(args) < 1:
            raise Exception('you must pass at least one AMI ID')

        ami_id = args[0]
        del(args[0])

        logging.debug("ami_id = %s", ami_id)

        if options.inst_type == "auto":
            options.inst_type = get_auto_instance_type(ami_id, provider)

        pending_instance_ids = []

        cmd = '%s-run-instances' % provider

        run_inst_args = [cmd]

        # these variables pass through to run-instances
        run_inst_pt = {
            "instance-count": options.count,
            "instance-type": options.inst_type,
            "key": options.keypair_name,
             }

        for key, val in run_inst_pt.iteritems():
            if key is not None and key != "":
                run_inst_args.append("--%s=%s" % (key, val))

        if options.launchpad_id:
            run_inst_args.append('--user-data')
            run_inst_args.append(CC_IMPORT_SSH %
                ' '.join(options.launchpad_id))

        if options.runargs is not None:
            run_inst_args.extend(options.runargs)

        run_inst_args.append(ami_id)

        # run-instances with pass through args
        logging.debug("executing %s", run_inst_args)
        logging.info("starting instances with ami_id = %s", ami_id)

        rinstances = Popen(run_inst_args, stdout=PIPE)
        #INSTANCE    i-32697259    ami-2d4aa444            pending\
        #    0        m1.small    2010-06-18T18:28:21+0000\
        #    us-east-1b    aki-754aa41c            \
        #    monitoring-disabled                    instance-store
        try:
            for line in rinstances.stdout:
                # Just in case there are %'s, don't confusee logging
                # XXX print these out instead
                logging.debug(line.replace('%', '%%').strip())
                parts = line.split("\t")
                if parts[0] == 'INSTANCE':
                    pending_instance_ids.append(parts[1])
        finally:
            rcode = rinstances.wait()

        logging.debug("command returned %d", rcode)
        logging.info("instances started: %s", pending_instance_ids)

        if bool(rcode):
            raise Exception('%s failed' % cmd)

    if len(pending_instance_ids) < 1:
        raise Exception('no instances were started!')

    cmd = '%s-describe-instances' % provider

    instances = []

    timeout_date = time.time() + 600

    signal.signal(signal.SIGALRM, timeout_handler)
    signal.alarm(600)

    logging.debug("timeout at %s", time.ctime(timeout_date))

    # We must wait for ssh to run commands
    if options.verify_ssh and not options.wait_for == 'ssh':
        logging.info('--verify-ssh implies --wait-for=ssh')
        options.wait_for = 'ssh'

    if options.ssh_run_cmd and not options.wait_for == 'ssh':
        logging.info('--ssh-run-cmd implies --wait-for=ssh')
        options.wait_for = 'ssh'

    while len(pending_instance_ids):
        new_pending_instance_ids = []
        describe_inst_args = [cmd]

        # remove '', confuses underlying commands
        pids = []
        for iid in pending_instance_ids:
            if len(iid):
                pids.append(iid)
        if len(pids):
            describe_inst_args.extend(pending_instance_ids)

        logging.debug('running %s', describe_inst_args)
        rdescribe = Popen(describe_inst_args, stdout=PIPE)
        try:
            for line in rdescribe.stdout:
                logging.debug(line.replace('%', '%%').strip())
                parts = line.split("\t")
                if parts[0] == 'INSTANCE':
                    iid = parts[1]
                    istatus = parts[5]
                    if istatus == 'terminated':
                        logging.debug('%s is terminated, ignoring...', iid)
                    elif istatus != 'running' and options.wait_for:
                        logging.warning('%s is %s', iid, istatus)
                        new_pending_instance_ids.append(iid)
                    elif istatus != 'running' and options.vol:
                        logging.warning('%s is %s', iid, istatus)
                        new_pending_instance_ids.append(iid)
                    else:
                        logging.info("%s %s", iid, istatus)
                        inst = Instance()
                        inst.id = iid
                        inst.hostname = parts[3]
                        inst.output = line
                        instances.append(inst)
        finally:
            rcode = rdescribe.wait()

        pending_instance_ids = new_pending_instance_ids

        logging.debug("command returned %d", rcode)
        logging.debug("pending instances: %s", pending_instance_ids)

        if bool(rcode):
            raise Exception('%s failed' % cmd)

        if len(pending_instance_ids):
            logging.debug('sleeping %d seconds', options.sleep_time)
            time.sleep(options.sleep_time)

    if options.ip:
        ips = options.ip.split(',')
        if len(ips) < len(instances):
            logging.warning(
                'only %d ips given, some instances will not get an ip',
                len(ips))
        elif len(ips) > len(instances):
            logging.warning('%d ips given, some ips will not be associated',
                            len(ips))

        rcmds = []
        ips.reverse()
        for inst in instances:
            cmd = '%s-associate-address' % provider
            if len(ips) < 1:
                break
            ip = ips.pop()
            aargs = [cmd, '-i', inst.id, ip]
            logging.debug('running %s', aargs)
            rassociate = Popen(aargs, stdout=PIPE)
            rcmds.append(rassociate)
        for rcmd in rcmds:
            # dump stdin into the inst object
            try:
                for line in rcmd.stdout:
                    logging.info(line)
            finally:
                ret = rcmd.wait()
                if bool(ret):
                    logging.debug('associate-ip returned %d', ret)

    if options.vol:
        # as you can start multiple instances, support multiple vols like ips,
        # instead of multiple volumes on one instance
        vols = options.vol.split(',')
        if len(vols) < len(instances):
            logging.warning('only %d volumes given, some instances will not'
                ' get a volume attached', len(vols))
        elif len(vols) > len(instances):
            logging.warning(
                '%d volumes given, some volumes will not be associated',
                len(vols))

        rcmds = []
        vols.reverse()
        for inst in instances:
            # instance needs to be 'running' not 'pending' before attaching
            # volume, otherwise it fails
            logging.info('waiting for instance to run')
            cmd = '%s-attach-volume' % provider
            if len(vols) < 1:
                break
            vol = vols.pop()
            dev = '/dev/sdb'
            args = [cmd, '-i', inst.id, '-d', dev, vol]
            logging.debug('running %s', args)
            logging.info("attaching volume with id = %s to instance id = %s",
                          vol, inst.id)
            rattach = Popen(args, stdout=PIPE)
            rcmds.append(rattach)
        for rcmd in rcmds:
            # dump stdin into the inst object
            try:
                for line in rcmd.stdout:
                    logging.info(line)
            finally:
                ret = rcmd.wait()
                if bool(ret):
                    logging.debug('attach-volume returned %d', ret)

    if options.wait_for == 'ssh':
        logging.info('waiting for ssh access')
        for inst in instances:
            pid = os.fork()
            if pid == 0:
                ssh_key_scan = SshKeyScanner(inst.id, inst.hostname, options)
                ssh_fingerprint = ssh_key_scan.scan()
                if options.verify_ssh:
                    # For ec2, it can take 3.5 minutes or more to get console
                    # output, do this last, and only if we have to.
                    cons_fp_scan = ConsoleFingerprintScanner(inst.id,
                        inst.hostname, provider, options)
                    console_fingerprint = cons_fp_scan.scan()

                    if console_fingerprint == ssh_fingerprint:
                        logging.debug('fingerprint match made for iid = %s',
                            inst.id)
                    else:
                        fmt = 'fingerprints do not match for iid = %s'
                        raise Exception(fmt % inst.id)
                ssh_key_scan.run_commands()
                raise SystemExit
            else:
                logging.debug('child pid for %s is %d', inst.id, pid)
                inst.child = pid
        logging.info('Waiting for %d children', len(instances))
        final_instances = []

        for inst in instances:
            try:
                (pid, status) = os.waitpid(inst.child, 0)
            except:
                logging.critical('%s - %d doesn\'t exist anymore?', inst.id,
                                 pid)
            logging.debug('%d returned status %d', pid, status)
            if not bool(status):
                final_instances.append(inst)
        instances = final_instances

    """ If we reach here, all has happened in the expected manner so
        we should produce the expected output which is instance-id\\tip\\n """

    final_instance_ids = []
    for inst in instances:
        final_instance_ids.append(inst.id)

    if options.teardown:
        terminate = ['%s-terminate-instances' % provider]
        terminate.extend(final_instance_ids)
        logging.debug('running %s', terminate)
        logging.info('terminating instances...')
        rterm = Popen(terminate, stdout=sys.stderr, stderr=sys.stderr)
        rterm.wait()


if __name__ == "__main__":
    main()

# vi: ts=4 expandtab