阿里算法工程师代码笔试第一题标准字典树解法

#coding:utf-8

import collections


class TrieNode(object):
    def __init__(self):
        self.children = collections.defaultdict(TrieNode)
        self.is_word = False

# 字典树的数据结构
class WordDictionary(object):
    def __init__(self):
        """
        Initialize your data structure here.
        """
        self.root = TrieNode()
        self.res = False

    def addWord(self, word):
        """
        Adds a word into the data structure.
        :type word: str
        :rtype: void
        """
        current = self.root
        for letter in word:
            current = current.children[letter]
        current.is_word = True

    def search(self, word):
        """
        Returns if the word is in the data structure. A word could contain the dot character '.' to represent any one letter.
        :type word: str
        :rtype: bool
        """
        current = self.root
        self.res = False
        self.dfs(current, word)
        return self.res

    def dfs(self, node, word):
        if not word:
            if node.is_word:
                self.res = True
            return

        if word[0] == ".":
            for n in node.children.values():
                self.dfs(n, word[1:])
        else:
            node = node.children.get(word[0])
            if not node:
                return
            self.dfs(node, word[1:])


def match(entities_line, query_line):
    entities = entities_line.split(",")

    search_tree_dict = dict()
    # 对每个实体分别构建一个 字典树
    for entity in entities:
        entity_name, values = entity.split("_")
        values = values.split("|")
        word_dictionary = WordDictionary()
        for value in values:
            word_dictionary.addWord(value)
        search_tree_dict[entity_name] = word_dictionary

    # 对每个可能的词语都遍历一遍，进行搜索，并保存其实体名字和其右边界
    search_result = dict()
    for i in range(len(query_line)):
        for j in range(i+1, len(query_line) + 1):
            word = query_line[i:j]
            for entity_name, word_dictionary in search_tree_dict.items():
                if word_dictionary.search(word):
                    if i not in search_result:
                        search_result[i] = []
                        search_result[i].append((j, entity_name))
                    else:
                        # 如果已有一个结果，且右边界相同，则继续添加。
                        # 如 添加singer之后，添加actor
                        if search_result[i][0][0] == j:
                            search_result[i].append((j, entity_name))
                        else:
                            # 因为是从左到右遍历的，所以新的匹配j肯定大于原来的j
                            # 所以清空原结果，新添加即可
                            search_result[i] = []
                            search_result[i].append((j, entity_name))

    output_string_list = []
    i = 0
    # 构造输出结果即可
    while i < len(query_line):
        if i in search_result:
            # 如果在匹配结果里，就把结果取出
            output_string_list.append(" ")
            output_string_list.append(query_line[i: search_result[i][0][0]])
            output_string_list.append("/")
            output_string_list.append(",".join([match_result[1] for match_result in search_result[i]]))
            output_string_list.append(" ")
            i = search_result[i][0][0]
        else:
            # 不在，则直接添加原字符
            output_string_list.append(query_line[i])
            i += 1

    output_string = "".join(output_string_list)

    return output_string


if __name__ == '__main__':
    # entities_line = "singer_周杰|周杰伦,song_七里香|稻香,actor_周杰伦|孙俪"
    # query = "请播放周杰伦的七里香给我听"
    entities_line = input()
    query = input()
    print(match(entities_line, query))

#算法工程师##阿里巴巴##笔试题目#