第二章 数据结构

数据结构是大一下的课程，总体学的还好，就当复习一下，以及看看有没有什么其他内容。

链表与邻接表

数组模拟单链表

使用两个数组e[N]和nxt[N]，使用下标关联起来。其他操作与正常链表一样。

数组模拟双链表

使用三个数组e[N]、l[N]和r[N]。

初始化：

r[0] = 1;
l[1] = 0;

单调栈

给定一个序列，求一个序列当中每个数左边离他最近的且比他小的数在什么地方。

#include <iostream>
using namespace std;
const int N = 100010;

int n;
int stk[N], tt;

int main(){
    cin >> n;
    for(int i = 0; i < n; i++){
        int x;
        cin >> x;
        while(tt && stk[tt] >= x) tt --;
        if(tt) cout << stk[tt] << endl;
        else cout << -1 << ' ';
        stk[++tt] = x;
    }
    return 0;
}

单调队列

滑动窗口：

#include <iostream>
using namespcae std;

coust int N = 1000010;

int n,k;
int a[N], q[N];

int main(){
    scanf("%d",&n);
    for(int i = 0; i < n; i++) scanf("%d",&a[i]);
    int hh = 0, tt = -1;
    for(int i = 0; i < n ; i++){
        if(hh <= tt && i - k + 1 > q[hh]) hh++;
        while (hh <= tt && a[q[tt]] >= a[i]) tt--;
        q[++tt] = i;
        if(i >= k - 1) printf("%d ",a[q[hh]]);
    }
    puts("");
    return 0;
}

KMP算法

定义KMP数组next[i] = j，即以i为终点的一段的数组里，前面j个元素和最后面j个元素是一样的。

匹配的时候，当匹配到key数组的第k位时，使用next[k]判断这一段数组有多少是相同的，把key数组向后移动strlen(key) - next[k]位，得到新的验证数组位置，就是把验证数组指针指向next[k]，而指向匹配数组的指针只用每个循环加1。

#include <iostream>
using namespace std;

const int N = 100010, M = 100010;

int n,m;
int p[N], s[M];
int nxt[N];

int main(){
    cin >> n >> p+1 >> m >> s+1;

    //求next的过程
    for(int i = 2, j = 0; i <= n;i++){
        while(j && p[i]!=p[j+1]) j = ne[j];
        if(p[i] == p[j+1]) j++;
        ne[i] = j;
    }

    //KMP匹配算法
    for(int i = 1, j = 0; i<=m; i++){
        while (j && s[i]!=p[j+1]) j = nxt[j]; //匹配不上使用next[j]计算前后缀重复区间大小，一直位移到可以匹配上。
        if(s[i] == p[j+1]) j++;
        if(j==n){
            //匹配成功！
        }
    }
}

Trie树

用来快速存储和查找字符串集合的数据结构，

每个节点一个字符，每个位置标记是否为结尾。这里使用数组存储Trie树，每一个位置是一个节点，使用son数组记录每个节点的子节点位置。

示例题目：

#include <iostream>
using namespace std;
const int N = 100010;

int n;
int son[N][26], cnt[N] , idx; //idx代表目前数组开到哪里，下标为0的为空节点。son数组记录的是下标为n的节点的子节点，cnt记录某个字符串结尾的次数，初始是0。
char str[N];

void insert(char str[]){
    int p = 0;
    for(int i = 0; str[i]; i++){
        int u = str[i] - 'a';
        if(!son[p][u]) son[p][u] = ++idx;
        p = son[p][u];
    }
    cnt[p] ++;
}

int query(char str[]){
    int p = 0;
    for(int i = 0; str[i]; i++){
        int u = str[i] - 'a';
        if(!son[p][u]) return 0;
        p = son[p][u];
    }
    return cnt[p];
}

int main(){
    int n;
    scanf("%d",&n);
    while(n--){
        char op[2];//读成字符串避免scanf读字符出现空格回车
        scanf("%s%s",op,str);
        if(op[0]=='I') insert(str);
        else query(str);
    }
    return 0;
}

并查集

并查集支持以下操作且可以近乎$O(1)$快速完成以下操作：1️⃣将两个集合合并；2️⃣询问两个元素是否在一个集合当中。

基本原理：使用树维护集合，每棵树是一个集合，每一个集合有一个元素作为根节点，树根的编号就是整个集合的编号，每一个节点存储一个它的父节点，p[x]表示x的父节点。根节点p[x]=x即集合编号。

问题一：如何判断树根，父节点指向自己：if(p[x]==x)

问题二：如何求x的集合编号，通过记录的父节点一直往上找：while(p[x] != x) x = p[x];

问题三：如何合并两个集合：假设p[x]的x的集合编号，p[y]是y的集合编号，直接p[x] = y即可。

问题四：问题二的时间复杂度比较高，可以进行路径优化：查找x的根节点的时候，可以把x和所有路径上的节点都直接指向根节点，优化下一次查找的时间复杂度。

示例题目：

#include <iostream>
using namespace std;
const int N = 100010;

int n;
int p[N];//记录每个元素的父节点是谁

int find(int x){ //带路径压缩优化，使用递归调用
    if(p[x]!=x) p[x] = find(p[x]);
    return p[x];
}

int main(){
    scanf("%d%d",&n,&m);
    for(int i = 1; i <=n ; i++) p[i] = i; //最开始每个元素在一个集合
    while(m--){
        char op[2];
        int a,b;
        scanf("%s%d%d",op,&a,&b);
        if(op[0]=='M') p[find(a)] = find(b);
        else{
            if(find(a) == find(b)) puts("Yes");
            else puts("No");
        }
    }
    return 0;
}

堆

如何手写一个堆？

插入一个数 heap[++size]; up(size);
求这个集合之中的最小值 heap[1]
删除最小值 heap[1] = heap[size]; size--; down(1);
删除任何一个元素 heap[k] = heap[size]; size--; down(k); up(k);
修改任何一个元素 heap[k] = x; down(x); up(x);

定义：堆是一个完全二叉树。小根堆递归定义：每个节点小于等于左右儿子，即根节点就是最小值。

存储：假设根节点是$1$，则x的左儿子是$2x$，x的右儿子是$2x+1$。

堆排序

其实利用的是求集合最小值和删除最小值两个操作，所以只需要完成down操作

#include <iostream>
#include <algorithm>
using namespace std;

const int N = 100010;

int n, m;
int h[N], size;

void down(int u){
     int t = u;
    if(u*2<=size && h[u*2]<h[t]) t = u*2;
    if(u*2+1<=size && h[u*2+1]<h[t]) t = u*2+1;
    if(u!=t){
        swap(h[u],h[t]);
        down(t);
    }
}

void up(int u){
    while(u/2 && h[u/2] > h[u]){
        swap(h[u/2],h[u]);
        u
    }
}

int main(){
    scanf("%d%d",&n,&m);
    for(int i = 1; i <= n; i++) scanf("%d", &h[i]);
    size = n;

    for(int i = n/2; i; i--) down(i); //建堆，不需要down最后一层

    while(m--){
        printf("%d",&h[1]);
        h[1] = h[size];
        size--;
        down(1);
    }
    return 0;
}

哈希表

存储结构：开放寻址法、拉链法

字符串哈希方式：把每一种字符映射成数字，使用以下公式计算：
$$
ABC…A = \
(1\times p^{n-1} + 2\times p^{n-2} + 3\times p^{n-3} +…1\times p^0)\ mod\ Q
$$
其中可以 $p=131或13331$ ， $Q = 2^{64}$ ，可以降低重复概率。 $Q = 2^{64}$ 可以直接使用unsigned long long直接存储。

可以把字符前缀哈希和记为h[i]，比如$(L+1)\ -\ R$这一段的哈希值为 $h[R]-h[L]\times p^{R-L+1}$ ，以及递推 $h[i] = h[i-1]\times p + str(i)$ 。

例题：

#include <iostream>
using namespace std;

typedef unsigned long long ULL;
const int N = 100010, P = 131;

int n,m;
char str[N];
ULL h[N],p[N];

ULL get(int l, int r){
    return h[r] - h[l-1]*p[r-l+1];
}

int main(){
    scanf("%d%d%s", &n, &m, str+1);
    p[0] = 1;
    for(int i = 1; i <= n; i++){
        p[i] = p[i - 1]*P;
        h[i] = h[i - 1]*P + str[i];
    }
    while(m--){
        int l1,r1,l2,r2;
        scanf("%d%d%d%d", &l1,&r1,&l2,&r2);
        if(get(l1,r1) == get(l2,r2)) puts("Yes");
        else puts("No");
    }
    return 0;
}

STL常用容器

vector

数组长度可以动态变化，倍增的思想

#include <vector>
using namespace std;

int main(){
    vector<int> a(10,3);//定义一个长度为10的int类型vector，每一位都是3
    a.size(); //返回元素个数，所有容器都有
    a.empty(); //返回是否为空，所有容器都有
    a.clear(); //清空
    a.front();a.back();//返回第一个数/最后一个数
    a.push_back();a.pop_back();//向最后插入/删除一个数
    a.begin();a.end();//第一个数/最后一个数的后面一个数
    a[];// 类似于数组调用
    vector<int> a(4,3), b(3,4);
    if(a < b) puts("a < b"); //支持比较运算，使用“字典序”
}

pair

pair<int, int>，使用p.first或p.second获取第一、第二个元素。

支持比较运算，以first为第一关键字，以second为第二关键字。

多层嵌套：pair<int, pair<int, int>>

string

字符串，substr()返回子串，c_str()返回字符数组起始地址

string a = "lxzs";
a += "nb"; //可以直接增加
a.size();
a.empty();
a.clear;
a.substr(1,2);//返回子串，第一个关键字是起始位置，第二个是子串长度
printf("%s\n", a.c_str());

queue

队列，push()，front()，pop()，有empty、size函数，没有clear函数。

a.push();//队尾插入
a.front();//返回队头元素
a.back();//返回队尾元素
a.pop();//弹出队头元素

priority_queue

优先队列，push()，top()，pop()，默认大根堆

a.push();//堆中插入
a.top();//返回堆顶元素
a.pop();//弹出堆顶元素

如果想使用小根堆，第一种可以直接所有元素添加负号。或者如下：

priority_queue<int, vector<int>, greater<int>> q;

stack

栈，push()，top()，pop()。跟queue差不多。

deque

双端队列。有empty、size、clear函数。效率较低。

a.front();//返回队头元素
a.back();//返回队尾元素
a.push_back();a.pop_back(); //队尾插入、弹出一个元素
a.push_front();a.pop_back(); //队头插入、弹出
a[];
a.begin();a.end();

set, map, multiset, multimap

基于平衡二叉树（红黑树），动态维护有序序列。

size();
empty();
clear();

set/multiset:
    insert(); //插入一个数
    find(); //查找一个数
    count(); //返回某一个数的个数
    erase();
        //(1) 输入是一个数x，删除所有值为x的数，O(log(n));
        //(2) 输入是一个迭代器，删除这个迭代器
    lower_bound;upper_bound;//第一个是返回大于等于x的最小的数，第二个返回大于x的最小的数。（都是返回迭代器）

map/multimap:
    insert();//插入的数是一个pair
    erase;//输入的参数是pair或者迭代器
    find();
    a[];

//eg.
map<string,int> a;
a["lx"] = 1;
cout << a["lx"] << endl; //时间复杂度O(log(n))

unordered_set, unordered_map, unordered_multiset, unordered_multimap

哈希表

和上面类似，增删改查的时间复杂度O(1) 。

不支持lower_bound()和upper_bound()、迭代器的++。

bitset

压位。比如要开1024大小的bool数组，需要1024字节空间。但是可以用bitset存成128字节，省8倍空间。

bitset<10000> S;
//支持所有的位运算操作
~, &, |, ^
>>, <<
==, !=
S[]

count(); //返回有多少个1
any(); //判断是否至少有一个为1
none(); //判断是否全为0
set(); //把所有位置成1
set(k, v); //把第k位变成v
reset(); //把所有位变成0
flip(); //等价于~
flip(k); //把第k位取反

来自北京

Diary of LX

第二章数据结构