這題就是給你一個固定的語法(不是題目輸入的),之後給你n個字串,你要判斷這個字串用給的語法去分析是否ambigous,如果不是就按題目要求輸出他的語法結構。
這裡我把給的語法轉化成形式文法,如下:
$
S \Longrightarrow list \; E \\
S \Longrightarrow list \; L \\
L \Longrightarrow E \; L \\
L \Longrightarrow E \; AND \; E \\
E \Longrightarrow T \\
E \Longrightarrow S
$
這裡的$list$就是原本的"a list of"、$AND$就是"and"、$T$就是所有的合法英文單字。之後利用CYK算法跟earley算法去分析並找出ambiguous。
首先把形式文法轉乘CNF表達式:
$
S \Longrightarrow W_1 \; S \\
S \Longrightarrow W_1 \; T \\
S \Longrightarrow W_1 \; L \\
L \Longrightarrow S \; L \\
L \Longrightarrow T \; L \\
L \Longrightarrow S \; L^{'} \\
L \Longrightarrow T \; L^{'} \\
L^{'} \Longrightarrow W_2 \; S \\
L^{'} \Longrightarrow W_2 \; T
$
這裡的$W_1$就是原本的"a list of"、$W_2$就是"and"、$T$就是所有的合法英文單字。
接著就可以跑CYK演算法了:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | #include<bits/stdc++.h> using namespace std; enum P{//symbol S,L,LP,T,W1,W2,NUM };// T="A"|"B"|"C"..., W1="a list of", w2="and" struct CNF{ P s,a,b;// s->ab CNF(P s,P a,P b):s(s),a(a),b(b){} }; vector<CNF> cnf; inline void cnf_init(){ cnf.push_back(CNF(S,W1,S)); cnf.push_back(CNF(S,W1,T)); cnf.push_back(CNF(S,W1,L)); cnf.push_back(CNF(L,S,L)); cnf.push_back(CNF(L,T,L)); cnf.push_back(CNF(L,S,LP)); cnf.push_back(CNF(L,T,LP)); cnf.push_back(CNF(LP,W2,S)); cnf.push_back(CNF(LP,W2,T)); } vector<pair<string,P> > tok; int dp[400][400][P::NUM]; bool amb[400][400][P::NUM];//ambiguous inline void CYK(){ for(size_t i=0;i<tok.size();++i){ memset(dp[i][i],0,sizeof(dp[i][i])); memset(amb[i][i],0,sizeof(amb[i][i])); dp[i][i][tok[i].second]=i+1; } for(int r=0;r<(int)tok.size();++r){ for(int l=r-1;l>=0;--l){ memset(dp[l][r],0,sizeof(dp[l][r])); memset(amb[l][r],0,sizeof(amb[l][r])); for(int k=l;k<r;++k){ for(size_t i=0;i<cnf.size();++i){ if(dp[l][k][cnf[i].a]&&dp[k+1][r][cnf[i].b]){ if(dp[l][r][cnf[i].s]){ amb[l][r][cnf[i].s]=true; }else{ dp[l][r][cnf[i].s]=k+1; amb[l][r][cnf[i].s]=amb[l][k][cnf[i].a]||amb[k+1][r][cnf[i].b]; } } } } } } } vector<string> ans; void dfs(int l,int r,P type){ if(l==r){ if(type==P::T)ans.push_back(tok[l].first); return; } bool is=type==P::S; if(is)ans.push_back("("); int k=dp[l][r][type]-1; for(size_t i=0;i<cnf.size();++i){ if(dp[l][k][cnf[i].a]&&dp[k+1][r][cnf[i].b]){ dfs(l,k,cnf[i].a); dfs(k+1,r,cnf[i].b); break; } } if(is)ans.push_back(")"); } int n; string s; int main(){ cin.tie(0); ios::sync_with_stdio(0); cnf_init(); cin>>n; cin.get(); while(n--){ getline(cin,s); replace(s.begin(),s.end(),',',' '); stringstream ss(s); tok.clear(); while(ss>>s){ if(s=="a"||s=="of")continue; if(s=="list"){ tok.push_back(make_pair("(",P::W1)); }else if(s=="and"){ tok.push_back(make_pair(")",P::W2)); }else tok.push_back(make_pair(s,P::T)); } CYK(); if(amb[0][tok.size()-1][P::S]){ cout<<"AMBIGUOUS\n"; }else{ ans.clear(); dfs(0,tok.size()-1,P::S); string pre; for(size_t i=0;i<ans.size();++i){ if(pre!="("&&pre!=""&&ans[i]!=")")cout<<' '; cout<<(pre=ans[i]); } cout<<'\n'; } } return 0; } |
第二種方式是用earlye直接做,因為earlye可以吃除了空字串之外的任何一種規則,所以可以直接建構語法自動機,我這裡是先建構完自動機後,再從他去建構語法樹:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | #include<bits/stdc++.h> using namespace std; struct Rule{ vector<vector<Rule*> > p; void add(const vector<Rule*> &l){ p.push_back(l); } }; map<string,Rule*> NameRule; map<Rule*,string> RuleName; inline void init_Rule(){ for(auto r:RuleName)delete r.first; RuleName.clear(); NameRule.clear(); } inline Rule *add_rule(const string &s){ if(NameRule.find(s)!=NameRule.end())return NameRule[s]; Rule *r=new Rule(); RuleName[r]=s; NameRule[s]=r; return r; } typedef vector<Rule*> production; struct State{ Rule *r; int rid,dot_id,start,end; State(Rule *r,int rid,int dot,int start):r(r),rid(rid),dot_id(dot),start(start),end(-1){} State(Rule *r=0,int col=0):r(r),rid(-1),dot_id(-1),start(-1),end(col){} bool completed()const{ return rid==-1||dot_id>=(int)r->p[rid].size(); } Rule *next_term()const{ if(completed())return 0; return r->p[rid][dot_id]; } bool operator<(const State& b)const{ if(start!=b.start)return start<b.start; if(dot_id!=b.dot_id)return dot_id<b.dot_id; if(r!=b.r)return r<b.r; return rid<b.rid; } void print()const{ cout<<RuleName[r]<<"->"; if(rid!=-1)for(size_t i=0;;++i){ if((int)i==dot_id)cout<<" "<<"$"; if(i>=r->p[rid].size())break; cout<<" "<<RuleName[r->p[rid][i]]; } cout<<" "<<"["<<start<<","<<end<<"]"<<endl; } }; struct Column{ Rule *term; string value; vector<State> s; map<State,set<pair<State,State> > > div; //div比較像一棵 左兄右子的樹 Column(Rule *r,const string &s):term(r),value(s){} Column(){} bool add(const State &st,int col){ if(div.find(st)==div.end()){ div[st]; s.push_back(st); s.back().end=col; return true; }else return false; } }; inline vector<Column> lexer(string text){ //tokenize,要自己寫 //他會把 input stream 變成 token stream,就是(terminal,value)pair vector<Column> token; replace(text.begin(),text.end(),',',' '); stringstream ss(text); while(ss>>text){ if(text=="a"||text=="of")continue; if(text=="list"){ token.push_back(Column(NameRule["("],"(")); }else if(text=="and"){ token.push_back(Column(NameRule[")"],")")); }else token.push_back(Column(NameRule["T"],text)); } return token; } vector<Column> table; inline void predict(int col,Rule *rul){ for(size_t i=0;i<rul->p.size();++i){ table[col].add(State(rul,i,0,col),col); } } inline void scan(int col,const State &s,Rule *r){ if(r!=table[col].term)return; State ns(s.r,s.rid,s.dot_id+1,s.start); table[col].add(ns,col); table[col].div[ns].insert(make_pair(s,State(r,col))); } inline void complete(int col,const State &s){ for(size_t i=0;i<table[s.start].s.size();++i){ State &st=table[s.start].s[i]; Rule *term=st.next_term(); if(!term||term->p.size()==0)continue; if(term==s.r){ State nst(st.r,st.rid,st.dot_id+1,st.start); table[col].add(nst,col); table[col].div[nst].insert(make_pair(st,s)); } } } inline pair<bool,State> parse(Rule *GAMMA,const vector<Column > &token){ table.resize(token.size()+1); for(size_t i=0;i<token.size();++i){ table[i+1]=Column(token[i]); } table[0]=Column(); table[0].add(State(GAMMA,0,0,0),0); for(size_t i=0;i<table.size();++i){ for(size_t j=0;j<table[i].s.size();++j){ State state=table[i].s[j]; if(state.completed()){ complete(i,state); }else{ Rule *term=state.next_term(); if(term->p.size()){ predict(i,term); }else if(i+1<table.size()){ scan(i+1,state,term); } } } } for(size_t i=0;i<table.back().s.size();++i){ if(table.back().s[i].r==GAMMA&&table.back().s[i].completed()){ return make_pair(true,table.back().s[i]); } } return make_pair(false,State(0,-1)); } struct node{//語法樹的節點 State s; vector<vector<node*> > child;//vector<node*>.size()>1表示ambiguous node(const State &s):s(s){} node(){} }; struct State_end_cmp{ bool operator()(const State &a,const State &b)const{ return a.end<b.end||(a.end==b.end&&a<b); } }; map<State,node*,State_end_cmp> cache; vector<node*> node_set; inline void init_cache(){ for(auto d:node_set)delete d; cache.clear(); node_set.clear(); } void _build_tree(const State &s,node *pa,bool amb=0){ if(cache.find(s)!=cache.end()){ pa->child.push_back(vector<node*>(1,cache[s])); return; } node *o; if(s.completed()){ o=new node(s); if(amb)pa->child.back().push_back(o); else pa->child.push_back(vector<node*>(1,o)); }else o=pa->child.back().back(); amb=0; for(auto div:table[s.end].div[s]){ if(!amb)_build_tree(div.first,pa); _build_tree(div.second,o,amb); amb=1; } if(s.completed())cache[s]=o; } inline node *build_tree(const State &s){ init_cache(); node o; _build_tree(s,&o); assert(o.child.size()==1); assert(o.child.back().size()==1); return o.child.back().back(); } void print_tree(node *o,int dep=0){ cout<<string(dep,' '),o->s.print(); for(auto div:o->child){ for(auto nd:div){ print_tree(nd,dep+2); } } } //開始寫code inline Rule *get_my_Rule(){ Rule *S=add_rule("S"),*E=add_rule("E"),*L=add_rule("L"); Rule *list=add_rule("("),*AND=add_rule(")"),*T=add_rule("T"); S->add({list,E}); S->add({list,L}); L->add({E,L}); L->add({E,AND,E}); E->add({T}); E->add({S}); Rule *GAMMA=add_rule("GAMMA");//一定要有gamma rule當作是最上層的語法 GAMMA->add({S}); return GAMMA; } vector<string> ans; bool dfs(node *o){ if(o->child.empty()){ if(o->s.r==NameRule["T"])ans.push_back(table[o->s.end].value); return 1; } bool is=o->s.r==NameRule["S"]; if(is)ans.push_back("("); for(auto div:o->child){ if(div.size()>1)return 0; if(!dfs(div[0]))return 0; } if(is)ans.push_back(")"); return 1; } int n; string s; vector<string> tok; int main(){ init_Rule(); Rule *GAMMA=get_my_Rule(); cin.tie(0); ios::sync_with_stdio(0); cin>>n; cin.get(); while(n--){ getline(cin,s); State END=parse(GAMMA,lexer(s)).second; init_cache(); node *root=build_tree(END); ans.clear(); if(!dfs(root)){ cout<<"AMBIGUOUS\n"; }else{ string pre; for(size_t i=0;i<ans.size();++i){ if(pre!="("&&pre!=""&&ans[i]!=")")cout<<' '; cout<<(pre=ans[i]); } cout<<'\n'; } } return 0; } |
沒有留言:
張貼留言