Codeforces 86C Genetic engineering
基因工程:给定m个子串,求构造长n的母串的方案数。母串中每个字符都至少来自一个子串。
4.7华丽地处理字符串
动态规划算法
用AC自动机(后缀树)维护子串,处理出每个节点的最长后缀模式串长度max_match。定义动态规划数组:
dp[i][j][k] := 构造长i的母串,trie树节点为j,后缀从后往前数最远第k个字符不满足要求时的方案数
于是搜索状态一共有ijk这三个元素,假设当前节点为cur,接受字符c转移到下一个节点nxt。
递推方程为
nxt = next_node(cur, c); if (nxt->max_match > k) // 匹配到后缀且长度大于等于k+1,于是可以把k+1替换为0 dp[length + 1][nxt][0] += dp[length][cur][k]; else // 否则不行,未匹配的尾部增长为k+1 dp[length + 1][nxt][k + 1] += dp[length][cur][k];
这里nxt即使匹配到某个串,但假如其长度x不足以消除后k+1这个字符,那么也不能委曲求全消除这x个字符。因为dp数组的定义是“最远第k+1个字符”,而不是“这k+1个字符都不满足要求”,这里面存在微妙的不同。
#include <iostream> #include <string> #include <queue> #include <algorithm> #include <map> using namespace std; // Aho Corasick struct Node { map<char, Node *> next; Node *fail; vector<int> match; static int _node_size; int id; int max_match; // 最长后缀匹配 Node() : fail(NULL) { id = _node_size++; max_match = 0; } }; int Node::_node_size = 0; Node *build(vector<string> pattens) { // 1. 构建trie树 Node *root = new Node(); root->fail = root; for (int i = 0; i < pattens.size(); i++) { Node *p = root; for (auto c : pattens[i]) { if (p->next[c] == 0) p->next[c] = new Node(); p = p->next[c]; } p->match.push_back(i); p->max_match = pattens[i].size(); } // 2. failure表 queue<Node *> que; for (int i = 0; i < 128; i++) { if (!root->next[i]) { root->next[i] = root; } else { root->next[i]->fail = root; que.push(root->next[i]); } } while (!que.empty()) { Node *p = que.front(); que.pop(); for (auto a : p->next) { int i = a.first; Node *np = a.second; if (!np) continue; // add que que.push(np); // search failure link Node *f = p->fail; while (!f->next[i]) f = f->fail; np->fail = f->next[i]; np->max_match = max(np->max_match, np->fail->max_match); // update matching list np->match.insert(np->match.end(), np->fail->match.begin(), np->fail->match.end()); } } return root; } // Trie树节点 p 接受字符 c 转移 Node *next_node(Node *p, char c) { while (!p->next[c]) p = p->fail; return p->next[c]; } Node *next_node(Node *p, string query) { for (char c : query) { p = next_node(p, c); } return p; } struct State { int length, k; Node *node; State(int length, Node *node, int k) : length(length), node(node), k(k) {} }; static const int MAX_N = 1000 + 1; static const int MAX_M = 10 + 1; int dp[MAX_N][MAX_N][MAX_M]; // dp[i][j][k] := 构造长i的母串,trie树节点为j,后缀有k个字符不满足要求 bool visited[MAX_N][MAX_N][MAX_M]; static const int MOD = 1000000009; int main() { int ids[256]; char cs[4]; const string acgt = "ACGT"; for (int i = 0; i < acgt.length(); ++i) { ids[acgt[i]] = i; cs[i] = acgt[i]; } int n, m; scanf("%d%d", &n, &m); vector<string> dna(m); for (int i = 0; i < m; ++i) { cin >> dna[i]; } auto root = build(dna); dp[0][0][0] = 1; visited[0][0][0] = 1; queue<State> que; que.push(State(0, root, 0)); while (!que.empty()) { State s = que.front(); que.pop(); int length = s.length, k = s.k; Node *cur = s.node; if (length == n) continue; for (auto c : cs) { auto nxt = next_node(cur, c); if (nxt->max_match > k) // 匹配到后缀且长度大于等于k+1,于是可以把k+1替换为0 { dp[length + 1][nxt->id][0] += dp[length][cur->id][k]; dp[length + 1][nxt->id][0] %= MOD; if (!visited[length + 1][nxt->id][0]) { visited[length + 1][nxt->id][0] = true; que.push(State(length + 1, nxt, 0)); } } else { // 否则不行,未匹配的尾部增长为k+1 if (k >= 9) continue; dp[length + 1][nxt->id][k + 1] += dp[length][cur->id][k]; dp[length + 1][nxt->id][k + 1] %= MOD; if (!visited[length + 1][nxt->id][k + 1]) { visited[length + 1][nxt->id][k + 1] = true; que.push(State(length + 1, nxt, k + 1)); } } } } int ans = 0; for (int i = 0; i < root->_node_size; ++i) { ans += dp[n][i][0]; ans %= MOD; } printf("%d", ans); return 0; }