02 算法体系班 - 08 前缀树、桶排序与排序总结 - 《java笔记》

前缀树
桶排序
排序算法稳定性
排序算法总结

前缀树

1）单个字符串中，字符从前到后的加到一棵多叉树上
2）字符放在路上，节点上有专属的数据项（常见的是pass和end值）
3）所有样本都这样添加，如果没有路就新建，如有路就复用
4）沿途节点的pass值增加1，每个字符串结束时来到的节点end值增加1可以完成前缀相关的查询

设计一种结构。用户可以：
1）void insert(String str) 添加某个字符串，可以重复添加，每次算1个
2）int search(String str) 查询某个字符串在结构中还有几个
3) void delete(String str) 删掉某个字符串，可以重复删除，每次算1个
4）intprefixNumber(String str) 查询有多少个字符串，是以str做前缀的

08 前缀树、桶排序与排序总结 - 图1

 // 数组实现前缀树
static class Node {
    public int pass;
    public int end;
    public Node[] next;
    private static final int DEFAULT_NEXT_NUM = 26;
    public Node() {
        pass = 0;
        end = 0;
        next = new Node[DEFAULT_NEXT_NUM];
    }
}
static class TrieTree {
    private final Node root;
    public TrieTree() {
        this.root = new Node();
    }
    public void insert(String word) {
        assert Objects.nonNull(word);
        char[] chars = word.toCharArray();
        Node node = root;
        root.pass++;
        int index = 0;
        for (char aChar : chars) {
            index = aChar - 'a';
            if (node.next[index] == null) {
                node.next[index] = new Node();
            }
            node = node.next[index];
            node.pass++;
        }
        node.end++;
    }
    // 删掉某个字符串，可以重复删除，每次算1个
    public void delete(String word) {
        if (search(word) > 0) {
            root.pass--;
            Node node = root;
            char[] chars = word.toCharArray();
            int index = 0;
            for (char aChar : chars) {
                index = aChar - 'a';
                if (--node.next[index].pass == 0) {
                    node.next[index] = null;
                    return;
                }
                node = node.next[index];
            }
            node.end--;
        }
    }
    // 某个字符串在结构中还有几个
    public int search(String word) {
        assert Objects.nonNull(word);
        Node node = root;
        char[] chars = word.toCharArray();
        int index = 0;
        for (char aChar : chars) {
            index = aChar - 'a';
            if (node.pass == 0 || node.next[index] == null) {
                return 0;
            }
            node = node.next[index];
        }
        return node.end;
    }
    // 查询有多少个字符串，是以str做前缀的
    public int prefixNumber(String pre) {
        assert Objects.nonNull(pre);
        Node node = root;
        char[] chars = pre.toCharArray();
        int index = 0;
        for (char aChar : chars) {
            index = aChar - 'a';
            if (node.next[index] == null) {
                return 0;
            }
            node = node.next[index];
        }
        return node.pass;
    }
}

//HashMap实现前缀树
static class Node {
    private int pass;
    private int end;
    private HashMap<Integer, Node> next;

    public Node() {
        pass = 0;
        end = 0;
        next = new HashMap<>(26);
    }
}

static class TrieTree {
    private Node root;

    public TrieTree() {
        root = new Node();
    }

    public void insert(String word) {
        assert Objects.nonNull(word);
        root.pass++;
        Node node = root;
        char[] chars = word.toCharArray();
        int index = 0;
        for (char aChar : chars) {
            index = aChar - 'a';
            if (!node.next.containsKey(index)) {
                node.next.put(index, new Node());
            }
            node = node.next.get(index);
            node.pass++;
        }
        node.end++;
    }

    // 删掉某个字符串，可以重复删除，每次算1个
    public void delete(String word) {
        if (search(word) > 0) {
            root.pass--;
            Node node = root;
            char[] chars = word.toCharArray();
            int index = 0;
            for (char aChar : chars) {
                index = aChar - 'a';
                if (--node.next.get(index).pass == 0) {
                    node.next.remove(index);
                    return;
                }
                node = node.next.get(index);
            }
            node.end--;
        }
    }

    // 某个字符串在结构中还有几个
    public int search(String word) {
        assert word != null;
        Node node = root;
        char[] chars = word.toCharArray();
        int index = 0;
        for (char aChar : chars) {
            index = aChar - 'a';
            if (node.pass == 0 || !node.next.containsKey(index)) {
                return 0;
            }
            node = node.next.get(index);
        }
        return node.end;
    }

    // 查询有多少个字符串，是以str做前缀的
    public int prefixNumber(String pre) {
        assert pre != null;
        Node node = root;
        char[] chars = pre.toCharArray();
        int index = 0;
        for (char aChar : chars) {
            index = aChar - 'a';
            if (node.next.get(index) == null) {
                return 0;
            }
            node = node.next.get(index);
        }
        return node.pass;
    }
}

08 前缀树、桶排序与排序总结 - 图2

static class TrieTreeComparator {
    private HashMap<String, Integer> hashMap;

    public TrieTreeComparator() {
        hashMap = new HashMap<>();
    }

    public void insert(String word) {
        if (!hashMap.containsKey(word)) {
            hashMap.put(word, 1);
        } else {
            hashMap.put(word, hashMap.get(word) + 1);
        }
    }

    public void delete(String word) {
        if (hashMap.containsKey(word)) {
            if (hashMap.get(word) == 1) {
                hashMap.remove(word);
            } else {
                hashMap.put(word, hashMap.get(word) - 1);
            }
        }
    }

    public int search(String word) {
        return hashMap.getOrDefault(word, 0);
    }

    public int prefixNumber(String pre) {
        int count = 0;
        for (String word : hashMap.keySet()) {
            if (word.startsWith(pre)) {
                count += hashMap.get(word);
            }
        }
        return count;
    }
}

桶排序

桶排序思想下的排序：计数排序 & 基数排序
1)桶排序思想下的排序都是不基于比较的排序
2)时间复杂度为O(N)，额外空间负载度O(M)
3)应用范围有限，需要样本的数据状况满足桶的划分

08 前缀树、桶排序与排序总结 - 图3
08 前缀树、桶排序与排序总结 - 图4

// only for 0~200 value   
// 计数排序要求数据最大值不能太大，否则浪费太多空间，影响时间复杂度

public static void countSort(int[] arr) {
    if (arr == null || arr.length < 2) {
        return;
    }
    int max = Integer.MIN_VALUE;
    for (int j : arr) {
        max = Math.max(max, j);
    }

    int[] bucket = new int[max + 1];

    for (int k : arr) {
        bucket[k]++;
    }

    int i = 0;
    for (int j = 0; j < bucket.length; j++) {
        while (bucket[j]-- > 0) {
            arr[i++] = j;
        }
    }
}

08 前缀树、桶排序与排序总结 - 图5

// only for no-negative value
public static void radixSort(int[] arr) {
    if (arr == null || arr.length < 2) {
        return;
    }
    int max = Integer.MIN_VALUE;
    for (int j : arr) {
        max = Math.max(max, j);
    }
    int digitMaxBit = 0;
    while (max != 0) {
        digitMaxBit++;
        max /= 10;
    }
    radixSort(arr, 0, arr.length - 1, digitMaxBit);
}


public static void radixSort(int[] arr, int left, int right, int digitMaxBit) {
    final int radix = 10;
    int[] help = new int[right - left + 1];
    // 0-9位

    for (int d = 1; d <= digitMaxBit; d++) {
        int[] count = new int[radix];
        for (int j = left; j <= right; j++) {
            int k = getDigit(arr[j], d);
            count[k]++;
        }
        // 转换为前缀和数组
        for (int i = 1; i < radix; i++) {
            count[i] = count[i] + count[i - 1];
        }
        for (int i = right; i >= left; i--) {
            int k = getDigit(arr[i], d);
            help[count[k] - 1] = arr[i];
            count[k]--;
        }
        for (int i = left, j = 0; i <= right; i++, j++) {
            arr[i] = help[j];
        }
    }
}

public static int getDigit(int x, int d) {
    // 获取每位上的值0~9
    return (x / (int) Math.pow(10, d - 1)) % 10;
}

排序算法稳定性

稳定性是指同样大小的样本再排序之后不会改变相对次序
对基础类型来说，稳定性毫无意义
对非基础类型来说，稳定性有重要意义
有些排序算法可以实现成稳定的，而有些排序算法无论如何都实现不成稳定的

排序算法总结

算法	时间复杂度	额外空间复杂度	稳定性
选择排序	O(N^2)	O(1)	无
冒泡排序	O(N^2)	O(1)	有
插入排序	O(N^2)	O(1)	有
归并排序	*O(NlogN)**	O(N)	有
快速排序	*O(NlogN)**	O(logN)	无
堆排序	*O(NlogN)**	O(1)	无
计数排序	O(N)	O(M）	有
基数排序	O(N)	O(N)	有

1）不基于比较的排序，对样本数据有严格要求，不易改写
2）基于比较的排序，只要规定好两个样本怎么比大小就可以直接复用
3）基于比较的排序，时间复杂度的极限是O(NlogN)
4）时间复杂度O(NlogN)、额外空间复杂度低于O(N)、且稳定的基于比较的排序是不存在的。
5）为了绝对的速度选快排、为了省空间选堆排、为了稳定性选归并

工程上对排序的改进：
1）稳定性考虑
2）充分利用O(N*logN)和O(N^2）排序的各自的优势