"""The basic Content and ContentSource classes.
.. inheritance-diagram:: ContentSource Content PostSource AnnotatorSource assembl.models.post.Post assembl.models.post.LocalPost assembl.models.post.SynthesisPost assembl.models.post.WidgetPost assembl.models.post.IdeaProposalPost assembl.models.post.ImportedPost assembl.models.mail.AbstractMailbox assembl.models.mail.IMAPMailbox assembl.models.mail.MailingList assembl.models.mail.AbstractFilesystemMailbox assembl.models.mail.MaildirMailbox assembl.models.mail.Email assembl.models.annotation.Webpage
    :parts: 1
"""
from future.utils import native_str_to_bytes, bytes_to_native_str
from builtins import hex
import logging
from abc import abstractmethod
import re
import uuid
from sqlalchemy import (
    Column,
    Integer,
    SmallInteger,
    Boolean,
    UnicodeText,
    String,
    Unicode,
    DateTime,
    ForeignKey,
)
from sqlalchemy.orm import relationship, backref, aliased
from ..lib import config
from sqla_rdfbridge.mapping import PatternIriClass
from ..lib.sqla import CrudOperation
from ..lib.model_watcher import get_model_watcher
from ..lib.utils import get_global_base_url
from . import Base, DiscussionBoundBase, OriginMixin
from .langstrings import (LangString, LangStringEntry)
from ..semantic.virtuoso_mapping import QuadMapPatternS
from ..auth import (
    CrudPermissions, P_ADD_POST, P_READ, P_ADMIN_DISC, P_EDIT_POST)
from ..semantic.namespaces import (
    SIOC, CATALYST, ASSEMBL, DCTERMS, QUADNAMES, FOAF)
from .discussion import Discussion
from ..lib.history_mixin import TombstonableOriginMixin
from ..lib.clean_input import sanitize_text, sanitize_html
log = logging.getLogger(__name__)
[docs]class ContentSource(DiscussionBoundBase, OriginMixin):
    """
    A ContentSource is where any outside content comes from. .
    """
    __tablename__ = "content_source"
    __external_typename = "Container"
    rdf_class = SIOC.Container
    id = Column(Integer, primary_key=True,
                info={'rdf': QuadMapPatternS(None, ASSEMBL.db_id)})
    name = Column(UnicodeText, nullable=False)
    type = Column(String(60), nullable=False)
    discussion_id = Column(Integer, ForeignKey(
        'discussion.id',
        ondelete='CASCADE',
        onupdate='CASCADE'
    ), nullable=False, index=True)
    connection_error = Column(SmallInteger)
    error_description = Column(String)
    error_backoff_until = Column(DateTime)
[docs]    @classmethod
    def special_quad_patterns(cls, alias_maker, discussion_id):
        return [
            QuadMapPatternS(
                Discussion.iri_class().apply(cls.discussion_id),
                CATALYST.uses_source,
                cls.iri_class().apply(cls.id),
                name=QUADNAMES.uses_source,
                conditions=(cls.discussion_id != None,)),
        ] 
    discussion = relationship(
        "Discussion",
        backref=backref(
            'sources', order_by="ContentSource.creation_date",
            cascade="all, delete-orphan"),
        info={'rdf': QuadMapPatternS(None, ASSEMBL.in_conversation)})
    __mapper_args__ = {
        'polymorphic_identity': 'content_source',
        'polymorphic_on': type,
        'with_polymorphic': '*'
    }
    retypeable_as = ("IMAPMailbox", "MailingList", "AbstractMailbox",
                     "AbstractFilesystemMailbox", "AnnotatorSource",
                     "PostSource", "FeedPostSource", "LoomioPostSource",
                     "FacebookGenericSource", "FacebookGroupSource",
                     "FacebookPagePostsSource", "FacebookPageFeedSource",
                     "FacebookSinglePostSource", "EdgeSenseDrupalSource")
    @abstractmethod
    def generate_message_id(self, source_post_id):
        # Generate a globally unique message_id for the post using
        # its source_post_id (locally unique within that source.)
        # In many cases, the source_post_id is already globally unique.
        return source_post_id
    _non_email_chars = re.compile(r'[^!#-\'\*\+\-\./-9=\?A-Z\^_`a-z\|\~]', re.U)
    @classmethod
    def flatten_source_post_id(cls, source_post_id, extra_length=0):
        # Ensure that a source_post_id can be used as part 1 of message_id
        sanitized = cls._non_email_chars.subn(
            lambda c: '_' + hex(ord(c.group()))[2:], source_post_id)[0]
        if len(sanitized) + extra_length > 64:
            # 64 is max according to RFC 5322
            # cut it short and add a digest of original
            import hashlib
            import base64
            d = hashlib.md5()
            d.update(native_str_to_bytes(source_post_id, 'utf-8'))
            d = bytes_to_native_str(base64.urlsafe_b64encode(d.digest()))
            sanitized = sanitized[
                :max(0, 64-len(d)-extra_length-1)]
            if sanitized:
                sanitized += "_" + d
            else:
                sanitized = d
        return sanitized
    def import_content(self, only_new=True):
        from assembl.tasks.source_reader import wake
        wake(self.id, reimport=not only_new)
    def make_reader(self):
        raise NotImplementedError()
[docs]    def get_discussion_id(self):
        return self.discussion_id or self.discussion.id 
    @property
    def connection_error_as_text(self):
        from ..tasks.source_reader import ReaderStatus
        return (ReaderStatus(self.connection_error).name
                if self.connection_error is not None else None)
[docs]    @classmethod
    def get_discussion_conditions(cls, discussion_id, alias_maker=None):
        return (cls.discussion_id == discussion_id,) 
    # Cannot be readable to all, because subclasses contain passwords
    crud_permissions = CrudPermissions(P_ADMIN_DISC, P_ADMIN_DISC)
    def reset_errors(self):
        self.connection_error = None
        self.error_description = None
        self.error_backoff_until = None 
[docs]class PostSource(ContentSource):
    """
    A Discussion PostSource is where commentary that is handled in the form of
    internal posts comes from.
    A discussion source should have a method for importing all content, as well
    as only importing new content. Maybe the standard interface for this should
    be `source.import()`.
    """
    __tablename__ = "post_source"
    rdf_class = ASSEMBL.PostSource
    id = Column(Integer, ForeignKey(
        'content_source.id',
        ondelete='CASCADE',
        onupdate='CASCADE'
    ), primary_key=True)
    last_import = Column(DateTime)
    __mapper_args__ = {
        'polymorphic_identity': 'post_source',
    }
[docs]    def get_discussion_id(self):
        return self.discussion_id or self.discussion.id 
    def get_default_prepended_id(self):
        # Used for PostSource's whose incoming posts cannot guarantee
        # ImportedPost.source_post_id is unique; in which case, the Post.message_id
        # which is a globally unique value maintain uniqueness integrity
        # by calling this function
        # Must be implemented by subclasses that will not have unique
        # id's on their incoming posts
        return ""
    @property
    def number_of_imported_posts(self):
        from .post import ImportedPost
        return self.db.query(ImportedPost).filter_by(
            source_id=self.id, tombstone_date=None).count()
[docs]    @classmethod
    def get_discussion_conditions(cls, discussion_id, alias_maker=None):
        return (cls.discussion_id == discussion_id,) 
[docs]    def send_post(self, post):
        """ Send a new post in the discussion to the source. """
        log.warn(
            "Source %s did not implement PostSource::send_post()"
            % self.__class__.__name__)  
[docs]class AnnotatorSource(ContentSource):
    """
    A source of content coming from annotator
    """
    __tablename__ = "annotator_source"
    id = Column(Integer, ForeignKey(
        'content_source.id',
        ondelete='CASCADE',
        onupdate='CASCADE'
    ), primary_key=True)
    __mapper_args__ = {
        'polymorphic_identity': 'annotator_source',
    }
    def generate_message_id(self, source_post_id):
        return source_post_id or (uuid.uuid1().hex +
            "_annotator@" + config.get('public_hostname'))
    def make_reader(self):
        # only push
        return None 
[docs]class ContentSourceIDs(Base):
    """
    A table that keeps track of the number of external identities that
    an internal post can be exported to.
    A stepping-stone to having Sinks
    """
    __tablename__ = 'content_source_ids'
    id = Column(Integer, primary_key=True)
    source_id = Column(
        Integer, ForeignKey(
            'content_source.id', onupdate='CASCADE', ondelete='CASCADE'),
        nullable=False, index=True)
    source = relationship('ContentSource', backref=backref(
                          'pushed_messages',
                          cascade='all, delete-orphan'))
    post_id = Column(
        Integer, ForeignKey(
            'content.id', onupdate='CASCADE', ondelete='CASCADE'),
        nullable=False, index=True)
    post = relationship('Content',
                        backref=backref('post_sink_associations',
                                        cascade='all, delete-orphan'))
    message_id_in_source = Column(String(256), nullable=False, index=True) 
[docs]class Content(TombstonableOriginMixin, DiscussionBoundBase):
    """
    Content is a polymorphic class to describe what is imported from a Source.
    The body and subject properly belong to the Post but were moved here to
    optimize the most common case.
    """
    __tablename__ = "content"
    __external_typename = "SPost"
    # __table_cls__ = TableWithTextIndex
    rdf_class = SIOC.Post
    id = Column(Integer, primary_key=True,
                info={'rdf': QuadMapPatternS(None, ASSEMBL.db_id)})
    type = Column(String(60), nullable=False)
    discussion_id = Column(Integer, ForeignKey(
        'discussion.id',
        ondelete='CASCADE',
        onupdate='CASCADE',
    ),
        nullable=False, index=True)
    discussion = relationship(
        "Discussion",
        backref=backref(
            'posts', order_by="Content.creation_date",
            cascade="all, delete-orphan"),
        info={'rdf': QuadMapPatternS(None, ASSEMBL.in_conversation)}
    )
    subject_id = Column(Integer, ForeignKey(LangString.id), index=True)
    body_id = Column(Integer, ForeignKey(LangString.id), index=True)
    subject = relationship(
        LangString,
        primaryjoin=subject_id == LangString.id,
        backref=backref("subject_of_post", lazy="dynamic"),
        single_parent=True, lazy="joined",
        cascade="all, delete-orphan")
    body = relationship(
        LangString,
        primaryjoin=body_id == LangString.id,
        backref=backref("body_of_post", lazy="dynamic"),
        single_parent=True, lazy="joined",
        cascade="all, delete-orphan")
    def __init__(self, *args, **kwargs):
        if (kwargs.get('subject', None) is None and
                kwargs.get('subject_id', None) is None):
            kwargs['subject'] = LangString.EMPTY()
        if (kwargs.get('body', None) is None and
                kwargs.get('body_id', None) is None):
            kwargs['body'] = LangString.EMPTY()
        super(Content, self).__init__(*args, **kwargs)
    @classmethod
    def subqueryload_options(cls):
        # Options for subquery loading. Use when there are many languages in the discussion.
        return (
            LangString.subqueryload_option(cls.subject),
            LangString.subqueryload_option(cls.body))
    @classmethod
    def joinedload_options(cls):
        # Options for joined loading. Use when there are few languages in the discussion.
        return (
            LangString.joinedload_option(cls.subject),
            LangString.joinedload_option(cls.body))
[docs]    @classmethod
    def best_locale_query(cls, locales):
        "BUGGY. Return a query that will load the post, best subject and best body for the given locales"
        # this failed in virtuoso, check now
        # Note that it fails with just body, and succeeds with subject.
        # Go figure. Fortunately not needed yet.
        subject_ls = aliased(LangString)
        body_ls = aliased(LangString)
        best_subject_sq = LangString.best_lang_old(locales)
        best_body_sq = LangString.best_lang_old(locales)
        return cls.default_db.query(
            cls, best_subject_sq, best_body_sq).join(
            subject_ls, cls.subject_id == subject_ls.id).join(
            best_subject_sq).join(
            body_ls, cls.body_id == body_ls.id).join(best_body_sq) 
    # old_subject = Column("subject", CoerceUnicode(), server_default="",
    #     info={'rdf': QuadMapPatternS(None, DCTERMS.title)})
    # TODO: check HTML or text? SIOC.content should be text.
    # Do not give it for now, privacy reasons
    # old_body = Column("body", UnicodeText, server_default="")
    #    info={'rdf': QuadMapPatternS(None, SIOC.content)})
    # TODO: Refactor hidden into PublicationStates.WIDGET_SCOPED
    hidden = Column(Boolean, server_default='0')
    __mapper_args__ = {
        'polymorphic_identity': 'content',
        'polymorphic_on': 'type',
        'with_polymorphic': '*'
    }
[docs]    def populate_from_context(self, context):
        if not(self.discussion or self.discussion_id):
            self.discussion = context.get_instance_of_class(Discussion)
        super(Content, self).populate_from_context(context) 
    def get_subject(self):
        return self.subject
    def get_body(self):
        return self.body
    def get_title(self):
        return self.subject
    def safe_set_body(self, body):
        if self.get_body_mime_type() == 'text/plain':
            for e in body['entries']:
                e['value'] = sanitize_text(e['value'])
        else:
            for e in body['entries']:
                e['value'] = sanitize_html(e['value'])
    def safe_set_subject(self, subject):
        for e in subject['entries']:
            if "<" in e['value']:
                e['value'] = sanitize_text(e['value'])
    def remove_translations(self):
        if self.subject:
            self.subject.remove_translations()
        self.body.remove_translations()
[docs]    def get_body_mime_type(self):
        """ Return the format of the body, so the frontend will know how to
        display it.  Currently, only:
        text/plain (Understood as preformatted text)
        text/html (Undestood as some subset of html)
        """
        return "text/plain" 
    def get_body_as_html(self):
        mimetype = self.get_body_mime_type()
        body = self.body
        if not body:
            return None
        if mimetype == 'text/html':
            return body
        elif mimetype == "text/plain":
            ls = LangString()
            for e in body.entries:
                _ = LangStringEntry(
                    value='<span style="white-space: pre-wrap">%s</div>' % (
                        e.value,),
                    langstring=ls, locale=e.locale)
            return ls
        else:
            log.error("What is this mimetype?" + mimetype)
            return body
    def get_original_subject(self):
        return self.subject.first_original().value
    def get_original_body_as_html(self):
        mimetype = self.get_body_mime_type()
        body = self.body
        if not body:
            return None
        if mimetype == 'text/html':
            return body.first_original().value
        elif mimetype == "text/plain":
            return '<span style="white-space: pre-wrap">%s</div>' % (
                        body.first_original().value,)
        else:
            log.error("What is this mimetype?" + mimetype)
            return body
    def get_original_body_as_text(self):
        mimetype = self.get_body_mime_type()
        body = self.body
        if not body:
            return ''
        body = body.first_original().value or ''
        if mimetype == 'text/plain':
            return body
        elif mimetype == 'text/html':
            return sanitize_text(body)
        else:
            log.error("What is this mimetype?" + mimetype)
            return body
    def has_attachments(self):
        return self.attachments or False
    def get_attachments_as_html_list(self):
        img_style = "margin: 15px 0 15px 0; max-width: 500px; max-height: auto;"
        img_source = "<a href='%s' target='_blank' style='%s'><img src='%s'></img></a>"
        other_source = "<a href='%s' target='_blank'>%s</a>"
        attachments = self.attachments
        attachment_sorted = sorted(attachments, key=lambda a: a.document.type)
        output = []
        for attachment in attachment_sorted:
            document = attachment.document
            mime_type = document.mime_type
            if mime_type and 'image' in mime_type:
                output.append(img_source % (document.external_url, img_style,
                              document.external_url))
            else:
                title = document.title or document.external_url
                output.append(other_source % (document.external_url, title))
        return output
    def get_body_as_text(self):
        mimetype = self.get_body_mime_type()
        body = self.body
        if not body:
            return None
        if mimetype == 'text/plain':
            return body
        elif mimetype == 'text/html':
            ls = LangString()
            for e in body.entries:
                _ = LangStringEntry(
                    value=sanitize_text(e.value),
                    langstring=ls, locale=e.locale)
            return ls
        else:
            log.error("What is this mimetype?" + mimetype)
            return body
    def maybe_translate(self, pref_collection):
        from assembl.tasks.translate import (
            translate_content, PrefCollectionTranslationTable)
        service = self.discussion.translation_service()
        if service.canTranslate is not None:
            translations = PrefCollectionTranslationTable(
                service, pref_collection)
            translate_content(
                self, translation_table=translations, service=service)
[docs]    def send_to_changes(self, connection=None, operation=CrudOperation.UPDATE,
                        discussion_id=None, view_def="changes"):
        """invoke the modelWatcher on creation"""
        super(Content, self).send_to_changes(
            connection, operation, discussion_id, view_def)
        watcher = get_model_watcher()
        if operation == CrudOperation.CREATE:
            watcher.processPostCreated(self.id) 
[docs]    def get_discussion_id(self):
        return self.discussion_id or self.discussion.id 
    @property
    def exported_to_sources(self):
        return [ContentSource.uri_generic(s.source_id)
                for s in self.post_sink_associations]
[docs]    @classmethod
    def get_discussion_conditions(cls, discussion_id, alias_maker=None):
        return (cls.discussion_id == discussion_id,) 
[docs]    @classmethod
    def special_quad_patterns(cls, alias_maker, discussion_id):
        discussion_alias = alias_maker.get_reln_alias(cls.discussion)
        return [
            QuadMapPatternS(
                None, FOAF.homepage,
                PatternIriClass(
                    QUADNAMES.post_external_link_iri,
                    # TODO: Use discussion.get_base_url.
                    # This should be computed outside the DB.
                    get_global_base_url() + '/%s/posts/local:SPost/%d', None,
                    ('slug', Unicode, False), ('id', Integer, False)).apply(
                    discussion_alias.slug, cls.id),
                name=QUADNAMES.post_external_link_map)
        ] 
    def language_priors(self, translation_service):
        discussion = self.discussion
        discussion_locales = discussion.discussion_locales
        return {translation_service.asKnownLocale(loc): 1
                for loc in discussion_locales}
    def guess_languages(self):
        from .langstrings import LocaleLabel
        if self.discussion is None:
            self.discussion = Discussion.get(self.discussion_id)
        assert self.discussion
        ts = self.discussion.translation_service()
        priors = self.language_priors(ts)
        if self.body:
            body_original = self.body.first_original()
            ts.confirm_locale(body_original, priors)
        if self.subject:
            if self.body and body_original.locale_code not in \
                    
LocaleLabel.SPECIAL_LOCALES:
                # boost the body's language
                priors = {k: v * 0.6 for (k, v) in priors.items()}
                priors[body_original.locale_code] = 1
            subject_original = self.subject.first_original()
            ts.confirm_locale(subject_original, priors)
    def indirect_idea_content_links(self):
        return []
    def widget_ideas(self):
        from .idea import Idea
        return [Idea.uri_generic(wil.idea_id) for wil in self.widget_idea_links]
    crud_permissions = CrudPermissions(
            P_ADD_POST, P_READ, P_EDIT_POST, P_ADMIN_DISC) 
LangString.setup_ownership_load_event(Content, ['subject', 'body'])