You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

76 lines
2.3 KiB

from bs4 import BeautifulSoup
from .error import ValidationError
class Selector(object):
def __init__(self, selector, required=False, as_list=False):
self.selector = selector
self.as_list = as_list
self.required = required
def _get_value(self, elm):
if elm is None:
return None
if 'content' in elm.attrs:
return elm.attrs['content']
return elm.text
def resolve(self, soup):
if isinstance(soup, (str, bytes)):
soup = BeautifulSoup(soup, 'html.parser')
value = None
if self.as_list:
value = [self._get_value(elm) for elm in soup.select(self.selector)]
else:
elm = soup.select_one(self.selector)
value = self._get_value(elm)
if not value and self.required:
raise ValidationError(
'Expected at least 1 element matching selector "{selector}", none was found'
.format(selector=self.selector)
)
return value
class AttrSelector(Selector):
def __init__(self, selector, attribute, *args, **kwargs):
super(AttrSelector, self).__init__(selector=selector, *args, **kwargs)
self.attribute = attribute
def _get_value(self, elm):
if elm is None:
return None
return elm.attrs.get(self.attribute)
class SchemaSelector(Selector):
def __init__(self, selector, schema, *args, **kwargs):
super(AttrSelector, self).__init__(selector=selector, *args, **kwargs)
self.schema = schema
def _get_value(self, elm):
return self.schema.resolve(elm)
class AnySelector(Selector):
def __init__(self, selectors, required=False):
self.selectors = selectors
self.required = required
def resolve(self, soup):
for selector in self.selectors:
try:
value = selector.resolve(soup)
if value:
return value
except ValidationError:
# DEV: It is ok if one fails, we will try the next one
pass
if self.required:
raise ValidationError(
'Expected at least 1 element matching selector "{selectors}", none was found'
.format(selectors=self.selectors)
)