![]()

Codeforces 86C Genetic engineering
基因工程:给定m个子串,求构造长n的母串的方案数。母串中每个字符都至少来自一个子串。
4.7华丽地处理字符串
动态规划算法
用AC自动机(后缀树)维护子串,处理出每个节点的最长后缀模式串长度max_match。定义动态规划数组:
dp[i][j][k] := 构造长i的母串,trie树节点为j,后缀从后往前数最远第k个字符不满足要求时的方案数
于是搜索状态一共有ijk这三个元素,假设当前节点为cur,接受字符c转移到下一个节点nxt。
递推方程为
nxt = next_node(cur, c); if (nxt->max_match > k) // 匹配到后缀且长度大于等于k+1,于是可以把k+1替换为0 dp[length + 1][nxt][0] += dp[length][cur][k]; else // 否则不行,未匹配的尾部增长为k+1 dp[length + 1][nxt][k + 1] += dp[length][cur][k];
这里nxt即使匹配到某个串,但假如其长度x不足以消除后k+1这个字符,那么也不能委曲求全消除这x个字符。因为dp数组的定义是“最远第k+1个字符”,而不是“这k+1个字符都不满足要求”,这里面存在微妙的不同。
#include <iostream>
#include <string>
#include <queue>
#include <algorithm>
#include <map>
using namespace std;
// Aho Corasick
struct Node
{
map<char, Node *> next;
Node *fail;
vector<int> match;
static int _node_size;
int id;
int max_match; // 最长后缀匹配
Node() : fail(NULL)
{
id = _node_size++;
max_match = 0;
}
};
int Node::_node_size = 0;
Node *build(vector<string> pattens)
{
// 1. 构建trie树
Node *root = new Node();
root->fail = root;
for (int i = 0; i < pattens.size(); i++)
{
Node *p = root;
for (auto c : pattens[i])
{
if (p->next[c] == 0)
p->next[c] = new Node();
p = p->next[c];
}
p->match.push_back(i);
p->max_match = pattens[i].size();
}
// 2. failure表
queue<Node *> que;
for (int i = 0; i < 128; i++)
{
if (!root->next[i])
{
root->next[i] = root;
}
else
{
root->next[i]->fail = root;
que.push(root->next[i]);
}
}
while (!que.empty())
{
Node *p = que.front();
que.pop();
for (auto a : p->next)
{
int i = a.first;
Node *np = a.second;
if (!np)
continue;
// add que
que.push(np);
// search failure link
Node *f = p->fail;
while (!f->next[i])
f = f->fail;
np->fail = f->next[i];
np->max_match = max(np->max_match, np->fail->max_match);
// update matching list
np->match.insert(np->match.end(), np->fail->match.begin(), np->fail->match.end());
}
}
return root;
}
// Trie树节点 p 接受字符 c 转移
Node *next_node(Node *p, char c)
{
while (!p->next[c])
p = p->fail;
return p->next[c];
}
Node *next_node(Node *p, string query)
{
for (char c : query)
{
p = next_node(p, c);
}
return p;
}
struct State
{
int length, k;
Node *node;
State(int length, Node *node, int k) : length(length), node(node), k(k)
{}
};
static const int MAX_N = 1000 + 1;
static const int MAX_M = 10 + 1;
int dp[MAX_N][MAX_N][MAX_M]; // dp[i][j][k] := 构造长i的母串,trie树节点为j,后缀有k个字符不满足要求
bool visited[MAX_N][MAX_N][MAX_M];
static const int MOD = 1000000009;
int main()
{
int ids[256];
char cs[4];
const string acgt = "ACGT";
for (int i = 0; i < acgt.length(); ++i)
{
ids[acgt[i]] = i;
cs[i] = acgt[i];
}
int n, m;
scanf("%d%d", &n, &m);
vector<string> dna(m);
for (int i = 0; i < m; ++i)
{
cin >> dna[i];
}
auto root = build(dna);
dp[0][0][0] = 1;
visited[0][0][0] = 1;
queue<State> que;
que.push(State(0, root, 0));
while (!que.empty())
{
State s = que.front();
que.pop();
int length = s.length, k = s.k;
Node *cur = s.node;
if (length == n)
continue;
for (auto c : cs)
{
auto nxt = next_node(cur, c);
if (nxt->max_match > k) // 匹配到后缀且长度大于等于k+1,于是可以把k+1替换为0
{
dp[length + 1][nxt->id][0] += dp[length][cur->id][k];
dp[length + 1][nxt->id][0] %= MOD;
if (!visited[length + 1][nxt->id][0])
{
visited[length + 1][nxt->id][0] = true;
que.push(State(length + 1, nxt, 0));
}
}
else
{ // 否则不行,未匹配的尾部增长为k+1
if (k >= 9)
continue;
dp[length + 1][nxt->id][k + 1] += dp[length][cur->id][k];
dp[length + 1][nxt->id][k + 1] %= MOD;
if (!visited[length + 1][nxt->id][k + 1])
{
visited[length + 1][nxt->id][k + 1] = true;
que.push(State(length + 1, nxt, k + 1));
}
}
}
}
int ans = 0;
for (int i = 0; i < root->_node_size; ++i)
{
ans += dp[n][i][0];
ans %= MOD;
}
printf("%d", ans);
return 0;
}

知识共享署名-非商业性使用-相同方式共享:码农场 » Codeforces 86C Genetic engineering 题解《挑战程序设计竞赛》
码农场