Day27

Day27

今天讲的是 KPM + hash算法
好好学习!!!
OJ

PS:

这次的题基本上都是用KMP写的,hash算法的写法等理解好了再补

KMP算法解释

详情注意一下链接
前后缀数组的理解
KMP解释
KMP白话讲解
KMP生动一点的讲解

贴一个板子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include<bits/stdc++.h>

using namespace std;
string s,t;
int n,m;
vector<int> cal_nxt(string s){
int n=(int)s.length();
vector<int> nxt(n);
for(int i=1;i<n;i++){
int j=nxt[i-1];
while(s[i]!=s[j]&&j>0) j=nxt[j-1];
if(s[i]==s[j]) j++;
nxt[i]=j;
}
return nxt;
}
int main(){
cin>>s>>t;
m=t.length();
n=s.length();
string T=t+'#'+s;
vector<int> nxt=cal_nxt(T);
for(int i=m+1;i<(int)T.length();i++){
if(nxt[i]==m){
cout<<"YES"<<endl;return 0;
}
}
cout<<"NO"<<endl;
return 0;
}


//另一个板子
string start,en;
int n, m;
int nxt[1000010];
//得到nxt维护数组
void getnxt()
{
int j, k;
j = 0, k = -1, nxt[0] = -1;
while(j < m)
{
//从头比或相等则比较下去并记录最长公共真前、后缀长度
if(k == -1 || en[j] == en[k])
{
//j之前的最长公共真前、后缀长度为k,前缀下标0~k-1,后缀下标j-k~j-1
nxt[++ j] = ++ k;
}
else k = nxt[k];//k回退到Next[k]的位置
}
}
int kmp()
{
getnxt();
int ans = 0;
int j, k;
j = 0, k = 0;
while(j < n)
{
//k==-1相当于从start的开始(重新)比较,相等就比下去
if(k == -1 || en[k] == start[j])
{
++ j;
++ k;
}
else k = nxt[k];
if(k == m)
{
k = nxt[k];//把j位置的字符和k位置的字符对齐,便于下次比较
ans ++;
}
}
return ans;
}

hash算法解释

详情注意一下链接
解释

加一个板子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include<bits/stdc++.h>

using namespace std;
typedef unsigned long long ull;
const ull base = 2333;
const ull mod = 1e9+9;
const int N = 1e6+100;
ull hashes[N],p[N];
ull gethashes(int l,int r){
return (hashes[r]%mod-(hashes[l-1]%mod*p[r-l+1]%mod)%mod+mod)%mod;
}
char s[N],t[N];
int main(){
int T;
scanf("%d",&T);
while(T--){
scanf("%s %s",t+1,s+1);
int n=strlen(s+1);
int m=strlen(t+1);
p[0]=1;
for(int i=1;i<=n;i++){
hashes[i]=(hashes[i-1]*base%mod+s[i]%mod)%mod;
p[i]=(p[i-1]%mod*base%mod)%mod;
}
ull ans=0;
for(int i=1;i<=m;i++) ans=(ans*base%mod+t[i]%mod)%mod;
int res=0;
for(int i=m;i<=n;i++){
if(gethashes(i-m+1,i)==ans){
res++;
}
}
printf("%d\n",res);
}
return 0;
}

Number Sequence

Description:
Given two sequences of numbers : a[1], a[2], …… , a[N], and b[1], b[2], …… , b[M] (1 <= M <= 10000, 1 <= N <= 1000000). Your task is to find a number K which make a[K] = b[1], a[K + 1] = b[2], …… , a[K + M - 1] = b[M]. If there are more than one K exist, output the smallest one.
Input
The first line of input is a number T which indicate the number of cases. Each case contains three lines. The first line is two numbers N and M (1 <= M <= 10000, 1 <= N <= 1000000). The second line contains N integers which indicate a[1], a[2], …… , a[N]. The third line contains M integers which indicate b[1], b[2], …… , b[M]. All integers are in the range of [-1000000, 1000000].
Output
For each test case, you should output one line which only contain K described above. If no such K exists, output -1 instead.

1
2
3
4
5
6
7
8
9
10
11
Sample Input
2
13 5
1 2 1 2 3 1 2 3 1 3 2 1 2
1 2 3 1 3
13 5
1 2 1 2 3 1 2 3 1 3 2 1 2
1 2 3 2 1
Sample Output
6
-1

code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include<bits/stdc++.h>
using namespace std;
const int maxx = 1e6 + 10;
int n, m;
int a[maxx], b[maxx];
int nxt[maxx];

void getnxt()
{
int j = 0, k = -1;
nxt[0] = -1;
while(j < m)
{
if(k == -1 || b[k] == b[j])
{
nxt[++ j] = ++ k;
}
else k = nxt[k];
}
}


int kpm()
{
getnxt();
int j = 0, k = 0;
int res = -1;
while(j < n)
{
if(k == -1 || b[k] == a[j])
{
++ j;
++ k;
}
else k = nxt[k];
if(k == m)
{
res = j;
break;
}
}
if(res == -1) return -1;
else return res - m + 1;
}

int main()
{
ios::sync_with_stdio(false);
int t;
cin >> t;
while(t --)
{
cin >> n >> m;
for(int i = 0; i < n; i ++) cin >> a[i];
for(int i = 0; i < m; i ++) cin >> b[i];
cout << kpm() << '\n';
}


return 0;
}

Oulipo

Description:
The French author Georges Perec (1936–1982) once wrote a book, La disparition, without the letter ‘e’. He was a member of the Oulipo group. A quote from the book:

Tout avait Pair normal, mais tout s’affirmait faux. Tout avait Fair normal, d’abord, puis surgissait l’inhumain, l’affolant. Il aurait voulu savoir où s’articulait l’association qui l’unissait au roman : stir son tapis, assaillant à tout instant son imagination, l’intuition d’un tabou, la vision d’un mal obscur, d’un quoi vacant, d’un non-dit : la vision, l’avision d’un oubli commandant tout, où s’abolissait la raison : tout avait l’air normal mais…

Perec would probably have scored high (or rather, low) in the following contest. People are asked to write a perhaps even meaningful text on some subject with as few occurrences of a given “word” as possible. Our task is to provide the jury with a program that counts these occurrences, in order to obtain a ranking of the competitors. These competitors often write very long texts with nonsense meaning; a sequence of 500,000 consecutive ‘T’s is not unusual. And they never use spaces.

So we want to quickly find out how often a word, i.e., a given string, occurs in a text. More formally: given the alphabet {‘A’, ‘B’, ‘C’, …, ‘Z’} and two finite strings over that alphabet, a word W and a text T, count the number of occurrences of W in T. All the consecutive characters of W must exactly match consecutive characters of T. Occurrences may overlap.
Input
The first line of the input file contains a single number: the number of test cases to follow. Each test case has the following format:

One line with the word W, a string over {‘A’, ‘B’, ‘C’, …, ‘Z’}, with 1 ≤ |W| ≤ 10,000 (here |W| denotes the length of the string W).
One line with the text T, a string over {‘A’, ‘B’, ‘C’, …, ‘Z’}, with |W| ≤ |T| ≤ 1,000,000.
Output
For every test case in the input file, the output should contain a single number, on a single line: the number of occurrences of the word W in the text T.

1
2
3
4
5
6
7
8
9
10
11
12
Sample Input
3
BAPC
BAPC
AZA
AZAZAZA
VERDI
AVERDXIVYERDIAN
Sample Output
1
3
0

code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#include<iostream>

using namespace std;
string start,en;
int n, m;
int nxt[1000010];
//得到nxt维护数组
void getnxt()
{
int j, k;
j = 0, k = -1, nxt[0] = -1;
while(j < m)
{
//从头比或相等则比较下去并记录最长公共真前、后缀长度
if(k == -1 || en[j] == en[k])
{
//j之前的最长公共真前、后缀长度为k,前缀下标0~k-1,后缀下标j-k~j-1
nxt[++ j] = ++ k;
}
else k = nxt[k];//k回退到Next[k]的位置
}
}
int kmp()
{
getnxt();
int ans = 0;
int j, k;
j = 0, k = 0;
while(j < n)
{
//k==-1相当于从start的开始(重新)比较,相等就比下去
if(k == -1 || en[k] == start[j])
{
++ j;
++ k;
}
else k = nxt[k];
if(k == m)
{
k = nxt[k];//把j位置的字符和k位置的字符对齐,便于下次比较
ans ++;
}
}
return ans;
}


int main()
{
ios::sync_with_stdio(false);
int t;
cin >> t;
while(t --)
{
cin >> en >> start;
n = start.size();
m = en.size();
cout << kmp() << '\n';
}

return 0;
}

剪花布条

Description:
一块花布条,里面有些图案,另有一块直接可用的小饰条,里面也有一些图案。对于给定的花布条和小饰条,计算一下能从花布条中尽可能剪出几块小饰条来呢?
Input
输入中含有一些数据,分别是成对出现的花布条和小饰条,其布条都是用可见ASCII字符表示的,可见的ASCII字符有多少个,布条的花纹也有多少种花样。花纹条和小饰条不会超过1000个字符长。如果遇见#字符,则不再进行工作。
Output
输出能从花纹布中剪出的最多小饰条个数,如果一块都没有,那就老老实实输出0,每个结果之间应换行。

1
2
3
4
5
6
7
Sample Input
abcde a3
aaaaaa aa
#
Sample Output
0
3

code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include<iostream>

using namespace std;
string start,en;
int n, m;
int nxt[10010];
//得到nxt维护数组
void getnxt()
{
int j, k;
j = 0, k = -1, nxt[0] = -1;
while(j < m)
{
//从头比或相等则比较下去并记录最长公共真前、后缀长度
if(k == -1 || en[j] == en[k])
{
//j之前的最长公共真前、后缀长度为k,前缀下标0~k-1,后缀下标j-k~j-1
nxt[++ j] = ++ k;
}
else k = nxt[k];//k回退到Next[k]的位置
}
}
int kmp()
{
getnxt();
int ans = 0;
int j, k;
j = 0, k = 0;
while(j < n)
{
//k==-1相当于从start的开始(重新)比较,相等就比下去
if(k == -1 || en[k] == start[j])
{
++ j;
++ k;
}
else k = nxt[k];
if(k == m)
{
k = 0;
ans ++;
}
}
return ans;
}


int main()
{
ios::sync_with_stdio(false);
while(cin >> start)
{
if(start == "#") break;
cin >> en;
n = start.size();
m = en.size();
cout << kmp() << '\n';
}

return 0;
}

Cyclic Nacklace

Description:
CC always becomes very depressed at the end of this month, he has checked his credit card yesterday, without any surprise, there are only 99.9 yuan left. he is too distressed and thinking about how to tide over the last days. Being inspired by the entrepreneurial spirit of “HDU CakeMan”, he wants to sell some little things to make money. Of course, this is not an easy task.

As Christmas is around the corner, Boys are busy in choosing christmas presents to send to their girlfriends. It is believed that chain bracelet is a good choice. However, Things are not always so simple, as is known to everyone, girl’s fond of the colorful decoration to make bracelet appears vivid and lively, meanwhile they want to display their mature side as college students. after CC understands the girls demands, he intends to sell the chain bracelet called CharmBracelet. The CharmBracelet is made up with colorful pearls to show girls’ lively, and the most important thing is that it must be connected by a cyclic chain which means the color of pearls are cyclic connected from the left to right. And the cyclic count must be more than one. If you connect the leftmost pearl and the rightmost pearl of such chain, you can make a CharmBracelet. Just like the pictrue below, this CharmBracelet’s cycle is 9 and its cyclic count is 2:

Now CC has brought in some ordinary bracelet chains, he wants to buy minimum number of pearls to make CharmBracelets so that he can save more money. but when remaking the bracelet, he can only add color pearls to the left end and right end of the chain, that is to say, adding to the middle is forbidden.
CC is satisfied with his ideas and ask you for help.
Input
The first line of the input is a single integer T ( 0 < T <= 100 ) which means the number of test cases.
Each test case contains only one line describe the original ordinary chain to be remade. Each character in the string stands for one pearl and there are 26 kinds of pearls being described by ‘a’ ~’z’ characters. The length of the string Len: ( 3 <= Len <= 100000 ).
Output
For each case, you are required to output the minimum count of pearls added to make a CharmBracelet.

1
2
3
4
5
6
7
8
9
Sample Input
3
aaa
abca
abcde
Sample Output
0
2
5

code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
int n;
string str;
int nxt[1000010];
void kmp()
{
int j = 0, k = -1;
nxt[0] = -1;
while(j < n)
{
if(k == -1 || str[k] == str[j])
{
nxt[++ j] = ++ k;
}
else k = nxt[k];
}
}
int main()
{
ios::sync_with_stdio(false);
ll t;
cin >> t;
while(t --)
{
cin >> str;
n = str.size();
kmp();
//n - nxt[n]为最小循环长度
if(nxt[n] && n % (n - nxt[n]) == 0)
printf("0\n");
//一个循环节的长度减去多余的长度,就是需要补的长度。
else
printf("%d\n",n - nxt[n] - n % (n - nxt[n]));
}

return 0;
}

Period

Description:
For each prefix of a given string S with N characters (each character has an ASCII code between 97 and 126, inclusive), we want to know whether the prefix is a periodic string. That is, for each i (2 <= i <= N) we want to know the largest K > 1 (if there is one) such that the prefix of S with length i can be written as A K ,that is A concatenated K times, for some string A. Of course, we also want to know the period K.
Input
The input consists of several test cases. Each test case consists of two lines. The first one contains N (2 <= N <= 1 000 000) – the size of the string S.The second line contains the string S. The input file ends with a line, having the
number zero on it.
Output
For each test case, output “Test case #” and the consecutive test case number on a single line; then, for each prefix with length i that has a period K > 1, output the prefix size i and the period K separated by a single space; the prefix sizes must be in increasing order. Print a blank line after each test case.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
Sample Input
3
aaa
12
aabaabaabaab
0
Sample Output
Test case #1
2 2
3 3

Test case #2
2 2
6 2
9 3
12 4

code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#include<iostream>
#include<cstring>
#include<cstdio>
#include<string>
#include<algorithm>
using namespace std;
#define N 1000010

char s[N];
int nextval[N];
int len;

void getnext(const char *s)
{
int i = 0, j = -1;
nextval[0] = -1;
while(i != len)
{
if(j == -1 || s[i] == s[j])
nextval[++i] = ++j;
else
j = nextval[j];
}
}

int main()
{
int T = 1;
int length, add;
while(scanf("%d", &len) && len)
{
scanf("%s", s);
getnext(s);
printf("Test case #%d\n", T++);
for(int i = 1; i <= len; ++i)
{
length = i - nextval[i]; //循环节的长度
if(i != length && i % length == 0) //如果有多个循环
printf("%d %d\n", i, i / length);
}
printf("\n");
}
return 0;
}

Power Strings

Description:
Given two strings a and b we define ab to be their concatenation. For example, if a = “abc” and b = “def” then ab = “abcdef”. If we think of concatenation as multiplication, exponentiation by a non-negative integer is defined in the normal way: a^0 = “” (the empty string) and a^(n+1) = a*(a^n).
Input
Each test case is a line of input representing s, a string of printable characters. The length of s will be at least 1 and will not exceed 1 million characters. A line containing a period follows the last test case.
Output
For each s you should print the largest n such that s = a^n for some string a.

1
2
3
4
5
6
7
8
9
10
11
Sample Input
abcd
aaaa
ababab
.
Sample Output
1
4
3
Hint
This problem has huge input, use scanf instead of cin to avoid time limit exceed.

code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include<iostream>
#include<stdio.h>
using namespace std;
int n;
int nxt[1000010];
string str;
void getnxt()
{
int j = 0, k = -1;
nxt[0] = -1;
while(j < n)
{
if(k == -1 || str[j] == str[k])
{
nxt[++ j] = ++ k;
}
else k = nxt[k];
}
}

int main()
{
ios::sync_with_stdio(0);
cin.tie(0);cout.tie(0);
while(cin >> str)
{
if(str == ".") break;
n = str.size();
getnxt();
int len = n - nxt[n];
if(n % len == 0)
printf("%d\n",n / len);
else printf("1\n");
}
return 0;
}

Seek the Name, Seek the Fame

Description:
The little cat is so famous, that many couples tramp over hill and dale to Byteland, and asked the little cat to give names to their newly-born babies. They seek the name, and at the same time seek the fame. In order to escape from such boring job, the innovative little cat works out an easy but fantastic algorithm:

Step1. Connect the father’s name and the mother’s name, to a new string S.
Step2. Find a proper prefix-suffix string of S (which is not only the prefix, but also the suffix of S).

Example: Father=’ala’, Mother=’la’, we have S = ‘ala’+’la’ = ‘alala’. Potential prefix-suffix strings of S are {‘a’, ‘ala’, ‘alala’}. Given the string S, could you help the little cat to write a program to calculate the length of possible prefix-suffix strings of S? (He might thank you by giving your baby a name:)
Input
The input contains a number of test cases. Each test case occupies a single line that contains the string S described above.

Restrictions: Only lowercase letters may appear in the input. 1 <= Length of S <= 400000.
Output
For each test case, output a single line with integer numbers in increasing order, denoting the possible length of the new baby’s name.

1
2
3
4
5
6
Sample Input
ababcababababcabab
aaaaa
Sample Output
2 4 9 18
1 2 3 4 5

code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#include<stdio.h>
#include<string.h>
using namespace std;
const int N = 400005;
char p[N];
int next[N],arr[N];
void getNext(char p[],int m,int next[]){
int j=0;
int t=-1;
next[0]=-1;
while(j<m){
if(t<0||p[j]==p[t]){
next[++j]=++t;
}
else t=next[t];
}
}
int main(){
while(scanf("%s",p)!=EOF){
int m=strlen(p);
getNext(p,m,next);
int cnt=0;
for(int i=m;next[i]!=-1;i=next[i])
arr[cnt++]=i;
for(int i=cnt-1;i>0;i--)
printf("%d ",arr[i]);
printf("%d\n",arr[0]);
}
return 0;
}

Compress Words

Description:
Amugae has a sentence consisting of n words. He want to compress this sentence into one word. Amugae doesn’t like repetitions, so when he merges two words into one word, he removes the longest prefix of the second word that coincides with a suffix of the first word. For example, he merges “sample” and “please” into “samplease”.

Amugae will merge his sentence left to right (i.e. first merge the first two words, then merge the result with the third word and so on). Write a program that prints the compressed word after the merging process ends.

Input
The first line contains an integer n (1≤n≤105), the number of the words in Amugae’s sentence.

The second line contains n words separated by single space. Each words is non-empty and consists of uppercase and lowercase English letters and digits (‘A’, ‘B’, …, ‘Z’, ‘a’, ‘b’, …, ‘z’, ‘0’, ‘1’, …, ‘9’). The total length of the words does not exceed 106.

Output
In the only line output the compressed word after the merging process ends as described in the problem.

题解

KMP模板题,用第二个串与第一个串中等长后缀进行匹配即可,比如第一个串长度为n,第二个串长度为m,n>m,则从第一个串的n-m处开始与第二个串匹配,最大匹配长度即为可以合并的长度。

1
2
3
4
5
6
7
8
9
10
11
Examples
Input
5
I want to order pizza
Output
Iwantorderpizza
Input
5
sample please ease in out
Output
sampleaseinout

code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <iostream>
#include <cmath>
#include <vector>
#include <queue>
#include <string.h>
using namespace std;

const int maxn = 1e6 + 10;
int nxt[maxn];
char str[maxn];
char ans[maxn];
int lenn,lenm;
void get_next(){
int k = -1, j = 0;
nxt[0] = -1;
while(j < lenm){
if(k == -1 || str[j] == str[k]) nxt[++j] = ++k;
else k = nxt[k];
}
}
//返回最长匹配长度
int KMP(int pos) {
get_next();
int j = 0;
for (int i = pos; i < lenn; i++) {
while (j&&ans[i] != str[j])j = nxt[j];
if (ans[i] == str[j])j++;
if (j == lenm) {
return lenm;
}
}
return j;
}
signed main() {
int N;
scanf("%d",&N);
scanf("%s",ans);
int pos=0;
lenn=strlen(ans);
for (int i = 2; i <= N; i++) {
scanf("%s",str);
lenm=strlen(str);
int now=KMP(max(pos-lenm,0));
for(int j=now;j<lenm;j++){
ans[lenn++]=str[j];
}
pos=lenn;
}
for(int i=0;i<lenn;i++){
printf("%c",ans[i]);
}
puts("");
return 0;
}

---------------- The End ----------------
0%