|
|
|
@ -0,0 +1,76 @@ |
|
|
|
from bs4 import BeautifulSoup |
|
|
|
|
|
|
|
from .error import ValidationError |
|
|
|
|
|
|
|
|
|
|
|
class Selector(object): |
|
|
|
def __init__(self, selector, required=False, as_list=False): |
|
|
|
self.selector = selector |
|
|
|
self.as_list = as_list |
|
|
|
self.required = required |
|
|
|
|
|
|
|
def _get_value(self, elm): |
|
|
|
if elm is None: |
|
|
|
return None |
|
|
|
if 'content' in elm.attrs: |
|
|
|
return elm.attrs['content'] |
|
|
|
return elm.text |
|
|
|
|
|
|
|
def resolve(self, soup): |
|
|
|
if isinstance(soup, (str, bytes)): |
|
|
|
soup = BeautifulSoup(soup, 'html.parser') |
|
|
|
value = None |
|
|
|
if self.as_list: |
|
|
|
value = [self._get_value(elm) for elm in soup.select(self.selector)] |
|
|
|
else: |
|
|
|
elm = soup.select_one(self.selector) |
|
|
|
value = self._get_value(elm) |
|
|
|
|
|
|
|
if not value and self.required: |
|
|
|
raise ValidationError( |
|
|
|
'Expected at least 1 element matching selector "{selector}", none was found' |
|
|
|
.format(selector=self.selector) |
|
|
|
) |
|
|
|
return value |
|
|
|
|
|
|
|
|
|
|
|
class AttrSelector(Selector): |
|
|
|
def __init__(self, selector, attribute, *args, **kwargs): |
|
|
|
super(AttrSelector, self).__init__(selector=selector, *args, **kwargs) |
|
|
|
self.attribute = attribute |
|
|
|
|
|
|
|
def _get_value(self, elm): |
|
|
|
if elm is None: |
|
|
|
return None |
|
|
|
return elm.attrs.get(self.attribute) |
|
|
|
|
|
|
|
|
|
|
|
class SchemaSelector(Selector): |
|
|
|
def __init__(self, selector, schema, *args, **kwargs): |
|
|
|
super(AttrSelector, self).__init__(selector=selector, *args, **kwargs) |
|
|
|
self.schema = schema |
|
|
|
|
|
|
|
def _get_value(self, elm): |
|
|
|
return self.schema.resolve(elm) |
|
|
|
|
|
|
|
|
|
|
|
class AnySelector(Selector): |
|
|
|
def __init__(self, selectors, required=False): |
|
|
|
self.selectors = selectors |
|
|
|
self.required = required |
|
|
|
|
|
|
|
def resolve(self, soup): |
|
|
|
for selector in self.selectors: |
|
|
|
try: |
|
|
|
value = selector.resolve(soup) |
|
|
|
if value: |
|
|
|
return value |
|
|
|
except ValidationError: |
|
|
|
# DEV: It is ok if one fails, we will try the next one |
|
|
|
pass |
|
|
|
|
|
|
|
if self.required: |
|
|
|
raise ValidationError( |
|
|
|
'Expected at least 1 element matching selector "{selectors}", none was found' |
|
|
|
.format(selectors=self.selectors) |
|
|
|
) |