[HOME]

Path : /lib/python2.7/site-packages/yum/
Upload :
Current File : //lib/python2.7/site-packages/yum/update_md.py

#!/usr/bin/python -t
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2005 Duke University 
#
# Seth Vidal <skvidal@linux.duke.edu>
# Luke Macken <lmacken@redhat.com>

"""
Update metadata (updateinfo.xml) parsing.
"""

import sys

from yum.i18n import utf8_text_wrap, to_utf8, to_unicode, _
from yum.yumRepo import YumRepository
from yum.packages import FakeRepository
from yum.misc import to_xml, decompress, repo_gen_decompress
from yum.misc import cElementTree_iterparse as iterparse 
import Errors

import logginglevels

import rpmUtils.miscutils
from rpmUtils.arch import ArchStorage


def safe_iterparse(filename, logger=None):
    """ Works like iterparse, but hides XML errors (prints a warning). """
    try:
        for event, elem in iterparse(filename):
            yield event, elem
    except SyntaxError: # Bad XML
        if logger:
            logger.critical(_("Updateinfo file is not valid XML: %s"), filename)
        else:
            print >> sys.stderr, "Updateinfo file is not valid XML:", filename

class UpdateNoticeException(Exception):
    """ An exception thrown for bad UpdateNotice data. """
    pass


class UpdateNotice(object):

    """
    A single update notice (for instance, a security fix).
    """

    def __init__(self, elem=None, repoid=None, vlogger=None):
        self._md = {
            'from'             : '',
            'type'             : '',
            'title'            : '',
            'release'          : '',
            'status'           : '',
            'version'          : '',
            'pushcount'        : '',
            'update_id'        : '',
            'issued'           : '',
            'updated'          : '',
            'description'      : '',
            'rights'           : '',
            'severity'         : '',
            'summary'          : '',
            'solution'         : '',
            'references'       : [],
            'pkglist'          : [],
            'reboot_suggested' : False
        }

        if elem:
            self._parse(elem)

        self._repoid = repoid
        self._vlogger = vlogger

    def __getitem__(self, item):
        """ Allows scriptable metadata access (ie: un['update_id']). """
        if type(item) is int:
            return sorted(self._md)[item]
        ret = self._md.get(item)
        if ret == '':
            ret = None
        return ret

    def __contains__(self, item):
        """ Allows quick tests for foo in blah. """
        return item in self._md

    def __setitem__(self, item, val):
        self._md[item] = val

    def __eq__(self, other):
        #  Tests to see if it's "the same data", which means that the
        # packages can be different (see add_notice).

        def _rid(un):
            if hasattr(un, '_repoid') and un._repoid is not None:
                return un._repoid
            else:
                return '<unknown>'

        def _log_failure(data):
            """Log the mismatched data similarly to conflict markers in git."""
            if self._vlogger is None:
                return
            msg = _('Duplicate of %s differs in some fields:\n')
            msg %= other._md['update_id']
            msg += '<<<<<<< %s:%s\n' % (_rid(other), data)
            msg += '%r\n=======\n%r\n' % (other._md[data], self._md[data])
            msg += '>>>>>>> %s:%s' % (_rid(self), data)
            # --verbose mode enables this
            self._vlogger.log(logginglevels.DEBUG_3, msg)

        if not other or not hasattr(other, '_md'):
            return False

        for data in ('type', 'update_id', 'status', 'rights',
                     'severity', 'release',
                     'issued', 'updated', 'version', 'pushcount',
                     'from', 'title', 'summary', 'description', 'solution'):
            if data == 'status': # FIXME: See below...
                continue
            if self._md[data] != other._md[data]:
                _log_failure(data)
                return False
        # FIXME: Massive hack, Fedora is really broken and gives status=stable
        # and status=testing for updateinfo notices, just depending on which
        # repo. they come from.
        data = 'status'
        if self._md[data] != other._md[data]:
            if self._md[data]  not in ('stable', 'testing'):
                _log_failure(data)
                return False
            if other._md[data] not in ('stable', 'testing'):
                _log_failure(data)
                return False
            # They are both really "stable" ...
            self._md[data]  = 'stable'
            other._md[data] = 'stable'

        return True

    def __ne__(self, other):
        return not (self == other)

    def text(self, skip_data=('files', 'summary', 'rights', 'solution')):
        head = """
===============================================================================
  %(title)s
===============================================================================
  Update ID : %(update_id)s
    Release : %(release)s
       Type : %(type)s
     Status : %(status)s
     Issued : %(issued)s
""" % self._md

        if self._md['updated'] and self._md['updated'] != self._md['issued']:
            head += "    Updated : %s" % self._md['updated']

        # Add our bugzilla references
        bzs = filter(lambda r: r['type'] == 'bugzilla', self._md['references'])
        if len(bzs) and 'bugs' not in skip_data:
            buglist = "       Bugs :"
            for bz in bzs:
                buglist += " %s%s\n\t    :" % (bz['id'], 'title' in bz
                                               and ' - %s' % bz['title'] or '')
            head += buglist[: - 1].rstrip() + '\n'

        # Add our CVE references
        cves = filter(lambda r: r['type'] == 'cve', self._md['references'])
        if len(cves) and 'cves' not in skip_data:
            cvelist = "       CVEs :"
            for cve in cves:
                cvelist += " %s\n\t    :" % cve['id']
            head += cvelist[: - 1].rstrip() + '\n'

        if self._md['summary'] and 'summary' not in skip_data:
            data = utf8_text_wrap(self._md['summary'], width=64,
                                  subsequent_indent=' ' * 12 + ': ')
            head += "    Summary : %s\n" % '\n'.join(data)

        if self._md['description'] and 'description' not in skip_data:
            desc = utf8_text_wrap(self._md['description'], width=64,
                                  subsequent_indent=' ' * 12 + ': ')
            head += "Description : %s\n" % '\n'.join(desc)

        if self._md['solution'] and 'solution' not in skip_data:
            data = utf8_text_wrap(self._md['solution'], width=64,
                                  subsequent_indent=' ' * 12 + ': ')
            head += "   Solution : %s\n" % '\n'.join(data)

        if self._md['rights'] and 'rights' not in skip_data:
            data = utf8_text_wrap(self._md['rights'], width=64,
                                  subsequent_indent=' ' * 12 + ': ')
            head += "     Rights : %s\n" % '\n'.join(data)

        if self._md['severity'] and 'severity' not in skip_data:
            data = utf8_text_wrap(self._md['severity'], width=64,
                                  subsequent_indent=' ' * 12 + ': ')
            head += "   Severity : %s\n" % '\n'.join(data)

        if 'files' in skip_data:
            return head[:-1] # chop the last '\n'

        #  Get a list of arches we care about:
        #XXX ARCH CHANGE - what happens here if we set the arch - we need to
        # pass this in, perhaps
        arches = set(rpmUtils.arch.getArchList())

        filelist = "      Files :"
        for pkg in self._md['pkglist']:
            for file in pkg['packages']:
                if file['arch'] not in arches:
                    continue
                filelist += " %s\n\t    :" % file['filename']
        head += filelist[: - 1].rstrip()

        return head

    def __str__(self):
        return to_utf8(self.text())
    def __unicode__(self):
        return to_unicode(self.text())

    def get_metadata(self):
        """ Return the metadata dict. """
        return self._md

    def _parse(self, elem):
        """
        Parse an update element::

            <!ELEMENT update (id, synopsis?, issued, updated,
                              references, description, rights?,
                              severity?, summary?, solution?, pkglist)>
                <!ATTLIST update type (errata|security) "errata">
                <!ATTLIST update status (final|testing) "final">
                <!ATTLIST update version CDATA #REQUIRED>
                <!ATTLIST update from CDATA #REQUIRED>
        """
        if elem.tag == 'update':
            for attrib in ('from', 'type', 'status', 'version'):
                self._md[attrib] = elem.attrib.get(attrib)
            for child in elem:
                if child.tag == 'id':
                    if not child.text:
                        raise UpdateNoticeException("No id element found")
                    self._md['update_id'] = child.text
                elif child.tag == 'pushcount':
                    self._md['pushcount'] = child.text
                elif child.tag == 'issued':
                    self._md['issued'] = child.attrib.get('date')
                elif child.tag == 'updated':
                    self._md['updated'] = child.attrib.get('date')
                elif child.tag == 'references':
                    self._parse_references(child)
                elif child.tag == 'description':
                    self._md['description'] = child.text
                elif child.tag == 'rights':
                    self._md['rights'] = child.text
                elif child.tag == 'severity':
                    self._md[child.tag] = child.text
                elif child.tag == 'summary':
                    self._md['summary'] = child.text
                elif child.tag == 'solution':
                    self._md['solution'] = child.text
                elif child.tag == 'pkglist':
                    self._parse_pkglist(child)
                elif child.tag == 'title':
                    self._md['title'] = child.text
                elif child.tag == 'release':
                    self._md['release'] = child.text
        else:
            raise UpdateNoticeException('No update element found')

    def _parse_references(self, elem):
        """
        Parse the update references::

            <!ELEMENT references (reference*)>
            <!ELEMENT reference>
                <!ATTLIST reference href CDATA #REQUIRED>
                <!ATTLIST reference type (self|other|cve|bugzilla) "self">
                <!ATTLIST reference id CDATA #IMPLIED>
                <!ATTLIST reference title CDATA #IMPLIED>
        """
        for reference in elem:
            if reference.tag == 'reference':
                data = {}
                for refattrib in ('id', 'href', 'type', 'title'):
                    data[refattrib] = reference.attrib.get(refattrib)
                self._md['references'].append(data)
            else:
                raise UpdateNoticeException('No reference element found')

    def _parse_pkglist(self, elem):
        """
        Parse the package list::

            <!ELEMENT pkglist (collection+)>
            <!ELEMENT collection (name?, package+)>
                <!ATTLIST collection short CDATA #IMPLIED>
                <!ATTLIST collection name CDATA #IMPLIED>
            <!ELEMENT name (#PCDATA)>
        """
        for collection in elem:
            data = { 'packages' : [] }
            if 'short' in collection.attrib:
                data['short'] = collection.attrib.get('short')
            for item in collection:
                if item.tag == 'name':
                    data['name'] = item.text
                elif item.tag == 'package':
                    data['packages'].append(self._parse_package(item))
            self._md['pkglist'].append(data)

    def _parse_package(self, elem):
        """
        Parse an individual package::

            <!ELEMENT package (filename, sum, reboot_suggested)>
                <!ATTLIST package name CDATA #REQUIRED>
                <!ATTLIST package version CDATA #REQUIRED>
                <!ATTLIST package release CDATA #REQUIRED>
                <!ATTLIST package arch CDATA #REQUIRED>
                <!ATTLIST package epoch CDATA #REQUIRED>
                <!ATTLIST package src CDATA #REQUIRED>
            <!ELEMENT reboot_suggested (#PCDATA)>
            <!ELEMENT filename (#PCDATA)>
            <!ELEMENT sum (#PCDATA)>
                <!ATTLIST sum type (md5|sha1) "sha1">
        """
        package = {}
        for pkgfield in ('arch', 'epoch', 'name', 'version', 'release', 'src'):
            package[pkgfield] = elem.attrib.get(pkgfield)

        #  Bad epoch and arch data is the most common (missed) screwups.
        # Deal with bad epoch data.
        if not package['epoch'] or package['epoch'][0] not in '0123456789':
            package['epoch'] = None

        for child in elem:
            if child.tag == 'filename':
                package['filename'] = child.text
            elif child.tag == 'sum':
                package['sum'] = (child.attrib.get('type'), child.text)
            elif child.tag == 'reboot_suggested':
                self._md['reboot_suggested'] = True
        return package

    def xml(self):
        """Generate the xml for this update notice object"""
        msg = """
<update from="%s" status="%s" type="%s" version="%s">
  <id>%s</id>
  <title>%s</title>
  <release>%s</release>
  <issued date="%s"/>
  <description>%s</description>\n""" % (to_xml(self._md['from']),
                to_xml(self._md['status']), to_xml(self._md['type']),
                to_xml(self._md['version']), to_xml(self._md['update_id']),
                to_xml(self._md['title']), to_xml(self._md['release']),
                to_xml(self._md['issued'], attrib=True),
                to_xml(self._md['description']))
        if self._md['updated']:
            # include the updated date in the generated xml
            msg += """ <updated date="%s"/>\n""" % (to_xml(self._md['updated'], attrib=True))
        if self._md['summary']:
            msg += """  <summary>%s</summary>\n""" % (to_xml(self._md['summary']))
        if self._md['solution']:
            msg += """  <solution>%s</solution>\n""" % (to_xml(self._md['solution']))
        if self._md['rights']:
            msg += """  <rights>%s</rights>\n""" % (to_xml(self._md['rights']))        
        if self._md['severity']:
            msg += """  <severity>%s</severity>\n""" % (to_xml(self._md['severity']))

        if self._md['references']:
            msg += """  <references>\n"""
            for ref in self._md['references']:
                if ref['title']:
                    msg += """    <reference href="%s" id="%s" title="%s" type="%s"/>\n""" % (
                    to_xml(ref['href'], attrib=True), to_xml(ref['id'], attrib=True),
                    to_xml(ref['title'], attrib=True), to_xml(ref['type'], attrib=True))
                else:
                    msg += """    <reference href="%s" id="%s"  type="%s"/>\n""" % (
                    to_xml(ref['href'], attrib=True), to_xml(ref['id'], attrib=True),
                    to_xml(ref['type'], attrib=True))

            msg += """  </references>\n"""
        
        if self._md['pkglist']:
            msg += """  <pkglist>\n"""
            for coll in self._md['pkglist']:
                msg += """    <collection short="%s">\n      <name>%s</name>\n""" % (
                      to_xml(coll['short'], attrib=True),
                      to_xml(coll['name']))
  
                for pkg in coll['packages']:
                    msg += """      <package arch="%s" name="%s" release="%s" src="%s" version="%s" epoch="%s">
        <filename>%s</filename>
      </package>\n""" % (to_xml(pkg['arch'], attrib=True),
                                to_xml(pkg['name'], attrib=True),
                                to_xml(pkg['release'], attrib=True),
                                to_xml(pkg['src'], attrib=True),
                                to_xml(pkg['version'], attrib=True),
                                to_xml(pkg['epoch'] or '0', attrib=True),
                                to_xml(pkg['filename']))
                msg += """    </collection>\n"""
            msg += """  </pkglist>\n"""
        msg += """</update>\n"""
        return msg

def _rpm_tup_vercmp(tup1, tup2):
    """ Compare two "std." tuples, (n, a, e, v, r). """
    return rpmUtils.miscutils.compareEVR((tup1[2], tup1[3], tup1[4]),
                                         (tup2[2], tup2[3], tup2[4]))

class UpdateMetadata(object):

    """
    The root update metadata object.
    """

    def __init__(self, repos=[], logger=None, vlogger=None):
        self._notices = {}
        self._cache = {}    # a pkg nvr => notice cache for quick lookups
        self._no_cache = {}    # a pkg name only => notice list
        self._repos = []    # list of repo ids that we've parsed

        self._logger  = logger
        self._vlogger = vlogger

        for repo in repos:
            try: # attempt to grab the updateinfo.xml.gz from the repodata
                self.add(repo)
            except Errors.RepoMDError:
                continue # No metadata found for this repo

        self.arch_storage = ArchStorage()
        self.archlist = self.arch_storage.archlist

    def get_notices(self, name=None):
        """ Return all notices. """
        if name is None:
            return self._notices.values()
        return name in self._no_cache and self._no_cache[name] or []

    notices = property(get_notices)

    def get_notice(self, nvr):
        """
        Retrieve an update notice for a given (name, version, release) string
        or tuple.
        """
        if type(nvr) in (type([]), type(())):
            nvr = '-'.join(nvr)
        return self._cache.get(nvr) or None

    #  The problem with the above "get_notice" is that not everyone updates
    # daily. So if you are at pkg-1, pkg-2 has a security notice, and pkg-3
    # has a BZ fix notice. All you can see is the BZ notice for the new "pkg-3"
    # with the above.
    #  So now instead you lookup based on the _installed_ pkg.pkgtup, and get
    # two notices, in order: [(pkgtup-3, notice), (pkgtup-2, notice)]
    # the reason for the sorting order is that the first match will give you
    # the minimum pkg you need to move to.
    def get_applicable_notices(self, pkgtup):
        """
        Retrieve any update notices which are newer than a
        given std. pkgtup (name, arch, epoch, version, release) tuple.
        Returns: list of (pkgtup, notice) that are newer than the given pkgtup,
                 in the order of newest pkgtups first.
        """
        oldpkgtup = pkgtup
        name = oldpkgtup[0]
        arch = oldpkgtup[1]
        ret = []
        other_arch_list = []
        notices = set()
        for notice in self.get_notices(name):
            for upkg in notice['pkglist']:
                for pkg in upkg['packages']:
                    other_arch = False
                    if pkg['name'] != name or pkg['arch'] != arch:
                        if (notice not in notices and pkg['name'] == name and pkg['arch'] in self.archlist):
                            other_arch = True
                        else:
                            continue
                    pkgtup = (pkg['name'], pkg['arch'], pkg['epoch'] or '0',
                              pkg['version'], pkg['release'])
                    if _rpm_tup_vercmp(pkgtup, oldpkgtup) <= 0:
                        continue
                    if other_arch:
                        other_arch_list.append((pkgtup, notice))
                    else:
                        ret.append((pkgtup, notice))
                        notices.add(notice)
        for pkgtup, notice in other_arch_list:
            if notice not in notices:
                ret.append((pkgtup, notice))
        ret.sort(cmp=_rpm_tup_vercmp, key=lambda x: x[0], reverse=True)
        return ret

    def add_notice(self, un):
        """ Add an UpdateNotice object. This should be fully populated with
            data, esp. update_id and pkglist/packages. """
        if not un or not un["update_id"]:
            return False

        #  This is "special", the main thing we want to deal with here is
        # having one errata that has multiple packages in it rpmA and rpmB, but
        # the packages are in repos. repoA and repoB. So instead of doing a
        # single errata pointing to both rpmA and rpmB and put the same thing
        # in both repodata (which is legal, and works fine) people want to have
        # just the packages from repoA in the repodata for repoA and vice versa.
        if un['update_id'] in self._notices:
            oun = self._notices[un['update_id']]
            if oun != un:
                return False

            # Ok, main parts of errata are the same, so now merge references:
            seen = set()
            for ref in oun['references']:
                seen.add(ref['id'])
            for ref in un['references']:
                if ref['id'] in seen:
                    continue
                seen.add(ref['id'])
                oun['references'].append(ref)

            # ...and pkglist (this assumes that a pkglist name XYZ is the same):
            seen = set()
            for pkg in oun['pkglist']:
                seen.add(pkg['name'])
            for pkg in un['pkglist']:
                if pkg['name'] in seen:
                    continue
                seen.add(pkg['name'])
                oun['pkglist'].append(pkg)

            un = oun

        self._notices[un['update_id']] = un
        for pkg in un['pkglist']:
            for filedata in pkg['packages']:
                self._cache['%s-%s-%s' % (filedata['name'],
                                          filedata['version'],
                                          filedata['release'])] = un
                no = self._no_cache.setdefault(filedata['name'], set())
                no.add(un)

        return True

    def add(self, obj, mdtype='updateinfo'):
        """ Parse a metadata from a given YumRepository, file, or filename. """

        def _rid(repoid, fmt=_(' (from %s)')):
            if not repoid:
                return ''
            return fmt % repoid

        if not obj:
            raise UpdateNoticeException
        repoid = None
        if type(obj) in (type(''), type(u'')):
            unfile = decompress(obj)
            infile = open(unfile, 'rt')

        elif isinstance(obj, YumRepository):
            if obj.id not in self._repos:
                repoid = obj.id
                self._repos.append(obj.id)
                md = obj.retrieveMD(mdtype)
                if not md:
                    raise UpdateNoticeException()
                unfile = repo_gen_decompress(md, 'updateinfo.xml')
                infile = open(unfile, 'rt')
        elif isinstance(obj, FakeRepository):
            raise Errors.RepoMDError, "No updateinfo for local pkg"
        else:   # obj is a file object
            infile = obj

        have_dup = False
        for event, elem in safe_iterparse(infile, logger=self._logger):
            if elem.tag == 'update':
                try:
                    un = UpdateNotice(elem, repoid, self._vlogger)
                except UpdateNoticeException, e:
                    msg = _("An update notice%s is broken, skipping.") % _rid(repoid)
                    if self._vlogger:
                        self._vlogger.log(logginglevels.DEBUG_1, "%s", msg)
                    else:
                        print >> sys.stderr, msg
                    continue

                if not self.add_notice(un):
                    msg = _("Update notice %s%s is broken, or a bad duplicate, skipping.") % (un['update_id'], _rid(repoid))
                    if not have_dup:
                        msg += _('\nYou should report this problem to the owner of the %srepository.') % _rid(repoid, "%s ")
                        msg += _('\nTo help pinpoint the issue, please attach the output of "yum updateinfo --verbose" to the report.')
                    have_dup = True
                    if self._vlogger:
                        self._vlogger.warn("%s", msg)
                    else:
                        print >> sys.stderr, msg

    def __unicode__(self):
        ret = u''
        for notice in self.notices:
            ret += unicode(notice)
        return ret
    def __str__(self):
        return to_utf8(self.__unicode__())

    def xml(self, fileobj=None):
        msg = """<?xml version="1.0"?>\n<updates>"""
        if fileobj:
            fileobj.write(msg)

        for notice in self._notices.values():
            if fileobj:
                fileobj.write(notice.xml())
            else:
                msg += notice.xml()

        end = """</updates>\n"""
        if fileobj:
            fileobj.write(end)
        else:
            msg += end

        if fileobj:
            return

        return msg


def main():
    """ update_md test function. """
    import yum.misc

    yum.misc.setup_locale()
    def usage():
        print >> sys.stderr, "Usage: %s <update metadata> ..." % sys.argv[0]
        sys.exit(1)

    if len(sys.argv) < 2:
        usage()

    try:
        print sys.argv[1]
        um = UpdateMetadata()
        for srcfile in sys.argv[1:]:
            um.add(srcfile)
        print unicode(um)
    except IOError:
        print >> sys.stderr, "%s: No such file:\'%s\'" % (sys.argv[0],
                                                          sys.argv[1:])
        usage()

if __name__ == '__main__':
    main()