LeetCode 八周 - 数据结构专题 - 《算法学习笔记》

https://leetcode-cn.com/problems/two-sum/">两数之和 https://leetcode-cn.com/problems/two-sum/
https://leetcode-cn.com/problems/repeated-dna-sequences/">重复的DNA序列 https://leetcode-cn.com/problems/repeated-dna-sequences/
https://leetcode-cn.com/problems/design-hashmap/">设计哈希映射 https://leetcode-cn.com/problems/design-hashmap/
https://leetcode-cn.com/problems/find-duplicate-subtrees/">寻找重复的子树 https://leetcode-cn.com/problems/find-duplicate-subtrees/
https://leetcode-cn.com/problems/subarray-sum-equals-k/">和为K的子数组 https://leetcode-cn.com/problems/subarray-sum-equals-k/
https://leetcode-cn.com/problems/continuous-subarray-sum/">连续的子数组和 https://leetcode-cn.com/problems/continuous-subarray-sum/
https://leetcode-cn.com/problems/friend-circles/">朋友圈 https://leetcode-cn.com/problems/friend-circles/
https://leetcode-cn.com/problems/redundant-connection/">冗余连接 https://leetcode-cn.com/problems/redundant-connection/
https://leetcode-cn.com/problems/top-k-frequent-words/">前K个高频单词 https://leetcode-cn.com/problems/top-k-frequent-words/
https://leetcode-cn.com/problems/find-median-from-data-stream/">数据流的中位数 https://leetcode-cn.com/problems/find-median-from-data-stream/
https://leetcode-cn.com/problems/data-stream-as-disjoint-intervals/">将数据流变为多个不相交区间 https://leetcode-cn.com/problems/data-stream-as-disjoint-intervals/
总结

数据结构
平衡树堆

两数之和 https://leetcode-cn.com/problems/two-sum/

class Solution {
public:
    vector<int> twoSum(vector<int>& nums, int target) {
        vector<pair<int, int>> vec;
        for (int i = 0; i < nums.size(); i++) vec.push_back({nums[i], i});
        sort(vec.begin(), vec.end(), [](auto a, auto b) {return a.first < b.first;});
        int n = vec.size();
        int i = 0, j = n - 1;
        while (i != j) {
            if (vec[i].first + vec[j].first < target) i++;
            else if (vec[i].first + vec[j].first == target) return vector<int>{vec[i].second, vec[j].second};
            else j--;
        }
        return vector<int>{-1, -1};
    }
};

遍历每个数时，在 hash表里找 target - a[i] 是否存在，之后把 a[i] 放进 hash表
O(N)

重复的DNA序列 https://leetcode-cn.com/problems/repeated-dna-sequences/

class Solution {
public:
    typedef unsigned long long ull;
    vector<string> findRepeatedDnaSequences(string s) {
        unordered_map<string, int> m;
        int n = s.size();
        for (int i = 0; i < n; i++) {
            if (i + 9 >= n) break;
            m[s.substr(i, 10)]++;
        }
        vector<string> ans;
        for (auto p : m) {
            if (p.second > 1) ans.push_back(p.first);
        }
        return ans;
    }
};

把每个长度为 10 的子串塞进 hash表，最后判一下出现次数

class Solution {
public:
    typedef unsigned long long ull;
    vector<string> findRepeatedDnaSequences(string s) {
        int n = s.size();
        vector<ull> vec(n + 1), f(n + 1, 1);
        for (int i = 0; i < n; i++) {
            vec[i + 1] = vec[i] + (s[i] - 'A' + 1) * f[i];
            f[i + 1] = f[i] * 131;
        }
        for (int i = 0; i < vec.size(); i++) cout << vec[i] << " ";
        cout << endl;
        unordered_map<ull, pair<int, int>> m;
        for (int i = 0; i < n; i++) {
            if (i + 9 >= n) break;
            ull val = (vec[i + 10] - vec[i]) * f[n - i - 10];
            if (m.count(val)) m[val].second++;
            else m[val] = {i, 1};
        }
        vector<string> ans;
        for (auto p : m) {
            if (p.second.second > 1) {
                cout << p.second.first << " " << p.second.second << endl;
                ans.push_back(s.substr(p.second.first, 10));
            }
        }
        return ans;
    }
};

做法二，用 O(N) 的时间做预处理，然后遍历一遍，手动 hash，比默认的会快一点
有个技巧，如何比较两个 hash值是否相同，直接乘到最高位即可

设计哈希映射 https://leetcode-cn.com/problems/design-hashmap/

class MyHashMap {
public:
    /** Initialize your data structure here. */
    vector<int> h_key, h_value;
    int N;
    MyHashMap() {
        h_key = h_value = decltype(h_key)(20000 + 11, -1);
        N = h_key.size();
    }
    int find(int key) {
        int pos = key % N;
        while (h_key[pos] != key && h_key[pos] != -1) {
            pos++;
            if (pos == N) pos = 0;
        }
        return pos;
    }

    /** value will always be non-negative. */
    void put(int key, int value) {
        int pos = find(key);
        h_key[pos] = key;
        h_value[pos] = value;
    }

    /** Returns the value to which the specified key is mapped, or -1 if this map contains no mapping for the key */
    int get(int key) {
        int pos = find(key);
        return h_value[pos];
    }

    /** Removes the mapping of the specified value key if this map contains a mapping for the key */
    void remove(int key) {
        int pos = find(key);
        h_key[pos] = -2;
        h_value[pos] = -1;
    }
};
/**
 * Your MyHashMap object will be instantiated and called as such:
 * MyHashMap* obj = new MyHashMap();
 * obj->put(key,value);
 * int param_2 = obj->get(key);
 * obj->remove(key);
 */

注意，删除的时候弄成 -2，防止跟后面的断了

寻找重复的子树 https://leetcode-cn.com/problems/find-duplicate-subtrees/

/**
 * Definition for a binary tree node.
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
public:

    unordered_map<string, int> str_int;
    unordered_map<int, int> str_cnt;
    int cnt = 0;
    vector<TreeNode*> ans;
    vector<TreeNode*> findDuplicateSubtrees(TreeNode* root) {
        dfs(root);
        return ans;
    }
    string dfs(TreeNode *root) {
        if (!root) return to_string(0);

        auto left = dfs(root->left);
        auto right = dfs(root->right);
        auto root_str = to_string(root->val) + ',' + left + ',' + right;
        if (!str_int.count(root_str)) str_int[root_str] = ++cnt;
        int t = str_int[root_str];
        str_cnt[t]++;
        if (str_cnt[t] == 2) ans.push_back(root);
        return to_string(t);
    }
};

利用前序遍历，把树转成字符串，但由于 hash字符串是O(N) 的，可以把字符串再 hash 成一个数

和为K的子数组 https://leetcode-cn.com/problems/subarray-sum-equals-k/

class Solution {
public:
    int subarraySum(vector<int>& nums, int k) {
        int n = nums.size();
        int sum = 0;
        unordered_map<int, int> hash;
        hash[0] = 1;
        int ans = 0;
        for (int i = 0; i < n; i++) {
            sum += nums[i];
            if (hash.count(sum - k)) ans += hash[sum - k];
            hash[sum]++;
        }
        return ans;
    }
};

利用前缀和，实现两个值求序列和，再利用 hash表存放历史前缀和
数据结构专题 - 图1
所以代码里是那样

连续的子数组和 https://leetcode-cn.com/problems/continuous-subarray-sum/

class Solution {
public:
    bool checkSubarraySum(vector<int>& nums, int k) {
        unordered_map<int, int> m;
        m[0] = -1;
        int sum = 0;
        for (int i = 0; i < nums.size(); i++) {
            sum += nums[i];
            if (k) sum %= k;
            if (m.count(sum) && i - m[sum] > 1) return true;
            if (!m.count(sum)) m[sum] = i;
        }
        return false;
    }
};

和上题类似，不过这里有个公式
数据结构专题 - 图2
然后利用 hash_map，value 存下标，key 存余数
对每个数遍历，看是否能找到余数相同的，并且下标差大于1
注意给 m[0] = -1，因为怕出现 0 0 这种情况

朋友圈 https://leetcode-cn.com/problems/friend-circles/

class Solution {
public:
    int a[200 + 5];
    int find(int i) {
        return i == a[i] ? i : a[i] = find(a[i]);
    }
    void join(int x, int y) {
        a[find(x)] = find(y);
    }
    int findCircleNum(vector<vector<int>>& M) {
        for (int i = 0; i < M.size(); i++) {
            a[i] = i;
        }
        for (int i = 0; i < M.size(); i++)
            for (int j = 0; j < M.size(); j++)
                if (M[i][j] == 1) join(i, j);

        int ans = 0;
        for (int i = 0; i < M.size(); i++)
            if (a[i] == i) ans++;
        return ans;
    }
};

并查集的模板题

冗余连接 https://leetcode-cn.com/problems/redundant-connection/

class Solution {
public:
    vector<int> p;
    int find(int x) {
        return x == p[x] ? x : p[x] = find(p[x]);
    }
    vector<int> findRedundantConnection(vector<vector<int>>& edges) {
        int n = edges.size();
        p = vector<int>(n + 1);
        for (int i = 1; i <= n; i++) p[i] = i;
        for (int i = 0; i < n; i++) {
            int from = edges[i][0];
            int to = edges[i][1];
            if (find(from) == find(to)) return {from, to};
            p[find(from)] = p[find(to)];
        }
        return {-1, -1};
    }
};

模板题，遍历图的时候，并查集合并，如果说两个节点对应一个父亲，说明这两个点已经是通的了，所以可以去掉这条边

前K个高频单词 https://leetcode-cn.com/problems/top-k-frequent-words/

class Solution {
public:
    vector<string> topKFrequent(vector<string>& words, int k) {
        unordered_map<string, int> m;
        for (auto str : words) m[str]++;
        priority_queue<pair<int, string>> q;
        for (auto m_item : m) {
            q.push({-m_item.second, m_item.first});
            if (q.size() > k) q.pop();
        }
        vector<string> ans(k);
        for (int i = k - 1; i >= 0; i--)
            ans[i] = q.top().second, q.pop();
        return ans;
    }
};

这里用的最小堆，保存 k 个出现次数最多的单词
这里需要注意，用的pair，而没自定义
pair 会比较第一个关键字，如果相等比较第二个关键字
greeter 只会比较 T 里的第一个属性
因为这里还需要对字符串排序，所以用pair
还有一个技巧，无脑 push，有 k+1个时 pop 一个
利用负数使其变成小顶堆

数据流的中位数 https://leetcode-cn.com/problems/find-median-from-data-stream/

class MedianFinder {
public:
    /** initialize your data structure here. */
    priority_queue<int> big, small;
    MedianFinder() {
    }

    void addNum(int num) {
        if (small.empty() || num >= small.top()) big.push(-num);
        else {
            small.push(num);
            big.push(-small.top());
            small.pop();
        }
        if (big.size() > small.size() + 1) {
            small.push(-big.top());
            big.pop();
        }
    }

    double findMedian() {
        if (big.size() + small.size() & 1) return -big.top();
        return (-big.top() + small.top()) / 2.0;
    }
};
/**
 * Your MedianFinder object will be instantiated and called as such:
 * MedianFinder* obj = new MedianFinder();
 * obj->addNum(num);
 * double param_2 = obj->findMedian();
 */

对顶堆，尽量让 big 堆去加
big堆加不了的时候，让small 堆加完弹一个给 big 堆，注意一下，保证 big堆最多也就比 small 堆多一个，否则要弹给 small堆

将数据流变为多个不相交区间 https://leetcode-cn.com/problems/data-stream-as-disjoint-intervals/

class SummaryRanges {
public:
    /** Initialize your data structure here. */
    map<int, int> L, R;
    SummaryRanges() {}

    void addNum(int x) {
        if (R.size()) {
            auto it = R.lower_bound(x);
            if (it != R.end() && it->second <= x) return;
        }
        int r = R.count(x - 1), l = L.count(x + 1);
        if (l && r) {
            R[L[x + 1]] = R[x - 1];
            L[R[x - 1]] = L[x + 1];
            L.erase(x + 1);
            R.erase(x - 1);
        } else if (l) {
            R[L[x + 1]] = x;
            L[x] = L[x + 1];
            L.erase(x + 1);
        } else if (r) {
            L[R[x - 1]] = x;
            R[x] = R[x - 1];
            R.erase(x - 1);
        } else {
            L[x] = R[x] = x;
        }
    }

    vector<vector<int>> getIntervals() {
        vector<vector<int>> ans;
        for (auto p : L) {
            ans.push_back({p.first, p.second});
        }
        return ans;
    }
};
/**
 * Your SummaryRanges object will be instantiated and called as such:
 * SummaryRanges* obj = new SummaryRanges();
 * obj->addNum(val);
 * vector<vector<int>> param_2 = obj->getIntervals();
 */

主要一个思想，线段怎么表示？
用两个 map，L[x] 保存以 x 为左端点的右端点，R[x] 保存以 x 为右端点的左断点
对于任何一个数x

以 x-1 为右端点
以 x+1 为左端点

如果满足第一种情况，则可以合并左边那条线段 L[R[x - 1]] = x;
满足第二种情况，则可以合并右边那条线段 R[L[x + 1]] = x;
如果都满足，合并左右的 L[R[x - 1]] = L[x + 1], R[L[x + 1]] = R[x - 1]
如果都不满足，L[x] = R[x] = x;
还要判断一下，x 是否插过
R.lower_bound(x) 找到的是以 x或x右边为右端点的线段，判断其左端点是否 ≤ x，即可判断

class SummaryRanges {
public:
    /** Initialize your data structure here. */
    map<int, int> l, r;
    SummaryRanges() {
    }

    void addNum(int x) {
        if (l.size()) {
            auto it = l.lower_bound(x);
            if (it != l.end() && it->second <= x) return;
        }
        int okl = l.count(x - 1), okr = r.count(x + 1);
        if (okl && okr) {
            r[l[x - 1]] = r[x + 1];
            l[r[x + 1]] = l[x - 1];
            l.erase(x - 1);
            r.erase(x + 1);
        } else if (okl) {
            r[l[x - 1]] = x;
            l[x] = l[x - 1];
            l.erase(x - 1);
        } else if (okr) {
            l[r[x + 1]] = x;
            r[x] = r[x + 1];
            r.erase(x + 1);
        } else {
            l[x] = r[x] = x;
        }
    }

    vector<vector<int>> getIntervals() {
        vector<vector<int>> ans;
        for (auto p : r) {
            ans.push_back({p.first, p.second});
        }
        return ans;
    }
};
/**
 * Your SummaryRanges object will be instantiated and called as such:
 * SummaryRanges* obj = new SummaryRanges();
 * obj->addNum(val);
 * vector<vector<int>> param_2 = obj->getIntervals();
 */

另外一种理解
L[x] 看成以 x为右端点的左端点
R[x] 看成以 x为左端点的右端点

总结

各种数据结构的骚操作
利用两个 map 表示线段，之前有题就是类似的，叫区间覆盖。