import argparse
import fnmatch
import os.path
import re
import sys
import unicodedata
from collections import namedtuple
from enum import Enum

from . import shellpattern
from .helpers import clean_lines
from .helpers.errors import Error


def parse_patternfile_line(line, roots, ie_commands, fallback):
    """Parse a pattern-file line and act depending on which command it represents."""
    ie_command = parse_inclexcl_command(line, fallback=fallback)
    if ie_command.cmd is IECommand.RootPath:
        roots.append(ie_command.val)
    elif ie_command.cmd is IECommand.PatternStyle:
        fallback = ie_command.val
    else:
        # it is some kind of include/exclude command
        ie_commands.append(ie_command)
    return fallback


def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
    if fallback is None:
        fallback = ShellPattern  # ShellPattern is defined later in this module
    for line in clean_lines(fileobj):
        fallback = parse_patternfile_line(line, roots, ie_commands, fallback)


def load_exclude_file(fileobj, patterns):
    for patternstr in clean_lines(fileobj):
        patterns.append(parse_exclude_pattern(patternstr))


class ArgparsePatternAction(argparse.Action):
    def __init__(self, nargs=1, **kw):
        super().__init__(nargs=nargs, **kw)

    def __call__(self, parser, args, values, option_string=None):
        parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)


class ArgparsePatternFileAction(argparse.Action):
    def __init__(self, nargs=1, **kw):
        super().__init__(nargs=nargs, **kw)

    def __call__(self, parser, args, values, option_string=None):
        """Load and parse patterns from a file.
        Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
        """
        filename = values[0]
        try:
            with open(filename) as f:
                self.parse(f, args)
        except FileNotFoundError as e:
            raise Error(str(e))

    def parse(self, fobj, args):
        load_pattern_file(fobj, args.paths, args.patterns)


class ArgparseExcludeFileAction(ArgparsePatternFileAction):
    def parse(self, fobj, args):
        load_exclude_file(fobj, args.patterns)


class PatternMatcher:
    """Represents a collection of pattern objects to match paths against.

    *fallback* is a boolean value that *match()* returns if no matching patterns are found.

    """
    def __init__(self, fallback=None):
        self._items = []

        # Value to return from match function when none of the patterns match.
        self.fallback = fallback

        # optimizations
        self._path_full_patterns = {}  # full path -> return value

        # indicates whether the last match() call ended on a pattern for which
        # we should recurse into any matching folder.  Will be set to True or
        # False when calling match().
        self.recurse_dir = None

        # whether to recurse into directories when no match is found
        # TODO: allow modification as a config option?
        self.recurse_dir_default = True

        self.include_patterns = []

        # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
        self.is_include_cmd = {
            IECommand.Exclude: False,
            IECommand.ExcludeNoRecurse: False,
            IECommand.Include: True
        }

    def empty(self):
        return not len(self._items) and not len(self._path_full_patterns)

    def _add(self, pattern, cmd):
        """*cmd* is an IECommand value.
        """
        if isinstance(pattern, PathFullPattern):
            key = pattern.pattern  # full, normalized path
            self._path_full_patterns[key] = cmd
        else:
            self._items.append((pattern, cmd))

    def add(self, patterns, cmd):
        """Add list of patterns to internal list. *cmd* indicates whether the
        pattern is an include/exclude pattern, and whether recursion should be
        done on excluded folders.
        """
        for pattern in patterns:
            self._add(pattern, cmd)

    def add_includepaths(self, include_paths):
        """Used to add inclusion-paths from args.paths (from commandline).
        """
        include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
        self.add(include_patterns, IECommand.Include)
        self.fallback = not include_patterns
        self.include_patterns = include_patterns

    def get_unmatched_include_patterns(self):
        """Note that this only returns patterns added via *add_includepaths* and it
           won't return PathFullPattern patterns as we do not match_count for them.
        """
        return [p for p in self.include_patterns
                if p.match_count == 0 and not isinstance(p, PathFullPattern)]

    def add_inclexcl(self, patterns):
        """Add list of patterns (of type CmdTuple) to internal list.
        """
        for pattern, cmd in patterns:
            self._add(pattern, cmd)

    def match(self, path):
        """Return True or False depending on whether *path* is matched.

        If no match is found among the patterns in this matcher, then the value
        in self.fallback is returned (defaults to None).

        """
        path = normalize_path(path).lstrip(os.path.sep)
        # do a fast lookup for full path matches (note: we do not count such matches):
        non_existent = object()
        value = self._path_full_patterns.get(path, non_existent)

        if value is not non_existent:
            # we have a full path match!
            self.recurse_dir = command_recurses_dir(value)
            return self.is_include_cmd[value]

        # this is the slow way, if we have many patterns in self._items:
        for (pattern, cmd) in self._items:
            if pattern.match(path, normalize=False):
                self.recurse_dir = pattern.recurse_dir
                return self.is_include_cmd[cmd]

        # by default we will recurse if there is no match
        self.recurse_dir = self.recurse_dir_default
        return self.fallback


def normalize_path(path):
    """normalize paths for MacOS (but do nothing on other platforms)"""
    # HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
    # Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
    return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path


class PatternBase:
    """Shared logic for inclusion/exclusion patterns.
    """
    PREFIX = NotImplemented

    def __init__(self, pattern, recurse_dir=False):
        self.pattern_orig = pattern
        self.match_count = 0
        pattern = normalize_path(pattern)
        self._prepare(pattern)
        self.recurse_dir = recurse_dir

    def match(self, path, normalize=True):
        """Return a boolean indicating whether *path* is matched by this pattern.

        If normalize is True (default), the path will get normalized using normalize_path(),
        otherwise it is assumed that it already is normalized using that function.
        """
        if normalize:
            path = normalize_path(path)
        matches = self._match(path)
        if matches:
            self.match_count += 1
        return matches

    def __repr__(self):
        return f'{type(self)}({self.pattern})'

    def __str__(self):
        return self.pattern_orig

    def _prepare(self, pattern):
        "Should set the value of self.pattern"
        raise NotImplementedError

    def _match(self, path):
        raise NotImplementedError


class PathFullPattern(PatternBase):
    """Full match of a path."""
    PREFIX = "pf"

    def _prepare(self, pattern):
        self.pattern = os.path.normpath(pattern).lstrip(os.path.sep)  # sep at beginning is removed

    def _match(self, path):
        return path == self.pattern


# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
# separator to the end of the path before matching.


class PathPrefixPattern(PatternBase):
    """Literal files or directories listed on the command line
    for some operations (e.g. extract, but not create).
    If a directory is specified, all paths that start with that
    path match as well.  A trailing slash makes no difference.
    """
    PREFIX = "pp"

    def _prepare(self, pattern):
        sep = os.path.sep

        self.pattern = (os.path.normpath(pattern).rstrip(sep) + sep).lstrip(sep)  # sep at beginning is removed

    def _match(self, path):
        return (path + os.path.sep).startswith(self.pattern)


class FnmatchPattern(PatternBase):
    """Shell glob patterns to exclude.  A trailing slash means to
    exclude the contents of a directory, but not the directory itself.
    """
    PREFIX = "fm"

    def _prepare(self, pattern):
        if pattern.endswith(os.path.sep):
            pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
        else:
            pattern = os.path.normpath(pattern) + os.path.sep + '*'

        self.pattern = pattern.lstrip(os.path.sep)  # sep at beginning is removed

        # fnmatch and re.match both cache compiled regular expressions.
        # Nevertheless, this is about 10 times faster.
        self.regex = re.compile(fnmatch.translate(self.pattern))

    def _match(self, path):
        return (self.regex.match(path + os.path.sep) is not None)


class ShellPattern(PatternBase):
    """Shell glob patterns to exclude.  A trailing slash means to
    exclude the contents of a directory, but not the directory itself.
    """
    PREFIX = "sh"

    def _prepare(self, pattern):
        sep = os.path.sep

        if pattern.endswith(sep):
            pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
        else:
            pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"

        self.pattern = pattern.lstrip(sep)  # sep at beginning is removed
        self.regex = re.compile(shellpattern.translate(self.pattern))

    def _match(self, path):
        return (self.regex.match(path + os.path.sep) is not None)


class RegexPattern(PatternBase):
    """Regular expression to exclude.
    """
    PREFIX = "re"

    def _prepare(self, pattern):
        self.pattern = pattern  # sep at beginning is NOT removed
        self.regex = re.compile(pattern)

    def _match(self, path):
        # Normalize path separators
        if os.path.sep != '/':
            path = path.replace(os.path.sep, '/')

        return (self.regex.search(path) is not None)


_PATTERN_CLASSES = {
    FnmatchPattern,
    PathFullPattern,
    PathPrefixPattern,
    RegexPattern,
    ShellPattern,
}

_PATTERN_CLASS_BY_PREFIX = {i.PREFIX: i for i in _PATTERN_CLASSES}

CmdTuple = namedtuple('CmdTuple', 'val cmd')


class IECommand(Enum):
    """A command that an InclExcl file line can represent.
    """
    RootPath = 1
    PatternStyle = 2
    Include = 3
    Exclude = 4
    ExcludeNoRecurse = 5


def command_recurses_dir(cmd):
    # TODO?: raise error or return None if *cmd* is RootPath or PatternStyle
    return cmd not in [IECommand.ExcludeNoRecurse]


def get_pattern_class(prefix):
    try:
        return _PATTERN_CLASS_BY_PREFIX[prefix]
    except KeyError:
        raise ValueError(f"Unknown pattern style: {prefix}") from None


def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
    """Read pattern from string and return an instance of the appropriate implementation class.

    """
    if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
        (style, pattern) = (pattern[:2], pattern[3:])
        cls = get_pattern_class(style)
    else:
        cls = fallback
    return cls(pattern, recurse_dir)


def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
    """Read pattern from string and return an instance of the appropriate implementation class.
    """
    epattern_obj = parse_pattern(pattern_str, fallback, recurse_dir=False)
    return CmdTuple(epattern_obj, IECommand.ExcludeNoRecurse)


def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
    """Read a --patterns-from command from string and return a CmdTuple object."""

    cmd_prefix_map = {
        '-': IECommand.Exclude,
        '!': IECommand.ExcludeNoRecurse,
        '+': IECommand.Include,
        'R': IECommand.RootPath,
        'r': IECommand.RootPath,
        'P': IECommand.PatternStyle,
        'p': IECommand.PatternStyle,
    }
    if not cmd_line_str:
        raise argparse.ArgumentTypeError("A pattern/command must not be empty.")

    cmd = cmd_prefix_map.get(cmd_line_str[0])
    if cmd is None:
        raise argparse.ArgumentTypeError("A pattern/command must start with any of: %s" %
                                         ', '.join(cmd_prefix_map))

    # remaining text on command-line following the command character
    remainder_str = cmd_line_str[1:].lstrip()
    if not remainder_str:
        raise argparse.ArgumentTypeError("A pattern/command must have a value part.")

    if cmd is IECommand.RootPath:
        # TODO: validate string?
        val = remainder_str
    elif cmd is IECommand.PatternStyle:
        # then remainder_str is something like 're' or 'sh'
        try:
            val = get_pattern_class(remainder_str)
        except ValueError:
            raise argparse.ArgumentTypeError(f"Invalid pattern style: {remainder_str}")
    else:
        # determine recurse_dir based on command type
        recurse_dir = command_recurses_dir(cmd)
        val = parse_pattern(remainder_str, fallback, recurse_dir)

    return CmdTuple(val, cmd)
