latex預處理

\( \newcommand{\ord}[1]{\mathcal{O}\left(#1\right)} \newcommand{\abs}[1]{\lvert #1 \rvert} \newcommand{\floor}[1]{\lfloor #1 \rfloor} \newcommand{\ceil}[1]{\lceil #1 \rceil} \newcommand{\opord}{\operatorname{\mathcal{O}}} \newcommand{\argmax}{\operatorname{arg\,max}} \newcommand{\str}[1]{\texttt{"#1"}} \)

2016年10月27日 星期四

[ 2007-2008 Summer Petrozavodsk Camp, Japanese Contest ] A. Reading Brackets in English

題目在這裡:http://codeforces.com/gym/100963

這題就是給你一個固定的語法(不是題目輸入的),之後給你n個字串,你要判斷這個字串用給的語法去分析是否ambigous,如果不是就按題目要求輸出他的語法結構。

這裡我把給的語法轉化成形式文法,如下:
$
S \Longrightarrow list \; E \\
S \Longrightarrow list \; L \\
L \Longrightarrow E \; L \\
L \Longrightarrow E \; AND \; E \\
E \Longrightarrow T \\
E \Longrightarrow S
$
這裡的$list$就是原本的"a list of"、$AND$就是"and"、$T$就是所有的合法英文單字。之後利用CYK算法跟earley算法去分析並找出ambiguous。

首先把形式文法轉乘CNF表達式:
 $
S \Longrightarrow W_1 \; S \\
S \Longrightarrow W_1 \; T \\
S \Longrightarrow W_1 \; L \\
L \Longrightarrow S \; L \\
L \Longrightarrow T \; L \\
L \Longrightarrow S \; L^{'} \\
L \Longrightarrow T \; L^{'} \\
L^{'} \Longrightarrow W_2 \; S \\
L^{'} \Longrightarrow W_2 \; T
$
這裡的$W_1$就是原本的"a list of"、$W_2$就是"and"、$T$就是所有的合法英文單字。
接著就可以跑CYK演算法了:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
#include<bits/stdc++.h>
using namespace std;
enum P{//symbol
    S,L,LP,T,W1,W2,NUM
};// T="A"|"B"|"C"..., W1="a list of", w2="and" 
struct CNF{
    P s,a,b;// s->ab
    CNF(P s,P a,P b):s(s),a(a),b(b){}
};
vector<CNF> cnf;
inline void cnf_init(){
    cnf.push_back(CNF(S,W1,S));
    cnf.push_back(CNF(S,W1,T));
    cnf.push_back(CNF(S,W1,L));
    cnf.push_back(CNF(L,S,L));
    cnf.push_back(CNF(L,T,L));
    cnf.push_back(CNF(L,S,LP));
    cnf.push_back(CNF(L,T,LP));
    cnf.push_back(CNF(LP,W2,S));
    cnf.push_back(CNF(LP,W2,T));
}
vector<pair<string,P> > tok;
int dp[400][400][P::NUM];
bool amb[400][400][P::NUM];//ambiguous 
inline void CYK(){
    for(size_t i=0;i<tok.size();++i){
        memset(dp[i][i],0,sizeof(dp[i][i]));
        memset(amb[i][i],0,sizeof(amb[i][i]));
        dp[i][i][tok[i].second]=i+1;
    }
    for(int r=0;r<(int)tok.size();++r){
        for(int l=r-1;l>=0;--l){
            memset(dp[l][r],0,sizeof(dp[l][r]));
            memset(amb[l][r],0,sizeof(amb[l][r]));
            for(int k=l;k<r;++k){
                for(size_t i=0;i<cnf.size();++i){
                    if(dp[l][k][cnf[i].a]&&dp[k+1][r][cnf[i].b]){
                        if(dp[l][r][cnf[i].s]){
                            amb[l][r][cnf[i].s]=true;
                        }else{
                            dp[l][r][cnf[i].s]=k+1;
                            amb[l][r][cnf[i].s]=amb[l][k][cnf[i].a]||amb[k+1][r][cnf[i].b];
                        }
                    }
                }
            }
        }
    }
}
vector<string> ans;
void dfs(int l,int r,P type){
    if(l==r){
        if(type==P::T)ans.push_back(tok[l].first);
        return;
    }
    bool is=type==P::S;
    if(is)ans.push_back("(");
    int k=dp[l][r][type]-1;
    for(size_t i=0;i<cnf.size();++i){
        if(dp[l][k][cnf[i].a]&&dp[k+1][r][cnf[i].b]){
            dfs(l,k,cnf[i].a);
            dfs(k+1,r,cnf[i].b);
            break;
        }
    }
    if(is)ans.push_back(")");
}
int n;
string s;
int main(){
    cin.tie(0);
        ios::sync_with_stdio(0);
    cnf_init();
    cin>>n;
    cin.get();
    while(n--){
        getline(cin,s);
        replace(s.begin(),s.end(),',',' ');
        stringstream ss(s);
        tok.clear();
        while(ss>>s){
            if(s=="a"||s=="of")continue;
            if(s=="list"){
                tok.push_back(make_pair("(",P::W1));
            }else if(s=="and"){
                tok.push_back(make_pair(")",P::W2));
            }else tok.push_back(make_pair(s,P::T));
        }
        CYK();
        if(amb[0][tok.size()-1][P::S]){
            cout<<"AMBIGUOUS\n";
        }else{
            ans.clear();
            dfs(0,tok.size()-1,P::S);
            string pre;
            for(size_t i=0;i<ans.size();++i){
                if(pre!="("&&pre!=""&&ans[i]!=")")cout<<' ';
                cout<<(pre=ans[i]);
            }
            cout<<'\n';
        }
    }
    return 0;
}

第二種方式是用earlye直接做,因為earlye可以吃除了空字串之外的任何一種規則,所以可以直接建構語法自動機,我這裡是先建構完自動機後,再從他去建構語法樹:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#include<bits/stdc++.h>
using namespace std;
struct Rule{
    vector<vector<Rule*> > p;
    void add(const vector<Rule*> &l){
        p.push_back(l);
    }
};
map<string,Rule*> NameRule;
map<Rule*,string> RuleName;
inline void init_Rule(){
    for(auto r:RuleName)delete r.first;
    RuleName.clear();
    NameRule.clear();
}
inline Rule *add_rule(const string &s){
    if(NameRule.find(s)!=NameRule.end())return NameRule[s];
    Rule *r=new Rule();
    RuleName[r]=s;
    NameRule[s]=r;
    return r;
}
typedef vector<Rule*> production;
struct State{
    Rule *r;
    int rid,dot_id,start,end;
    State(Rule *r,int rid,int dot,int start):r(r),rid(rid),dot_id(dot),start(start),end(-1){}
    State(Rule *r=0,int col=0):r(r),rid(-1),dot_id(-1),start(-1),end(col){}
    bool completed()const{
        return rid==-1||dot_id>=(int)r->p[rid].size();
    }
    Rule *next_term()const{
        if(completed())return 0;
        return r->p[rid][dot_id];
    }
    bool operator<(const State& b)const{
        if(start!=b.start)return start<b.start;
        if(dot_id!=b.dot_id)return dot_id<b.dot_id;
        if(r!=b.r)return r<b.r;
        return rid<b.rid;
    }
    void print()const{
        cout<<RuleName[r]<<"->";
        if(rid!=-1)for(size_t i=0;;++i){
            if((int)i==dot_id)cout<<" "<<"$";
            if(i>=r->p[rid].size())break;
            cout<<" "<<RuleName[r->p[rid][i]];
        }
        cout<<" "<<"["<<start<<","<<end<<"]"<<endl;
    }
};
struct Column{
    Rule *term;
    string value;
    vector<State> s;
    map<State,set<pair<State,State> > > div;
    //div比較像一棵 左兄右子的樹
    Column(Rule *r,const string &s):term(r),value(s){}
    Column(){}
    bool add(const State &st,int col){
        if(div.find(st)==div.end()){
            div[st];
            s.push_back(st);
            s.back().end=col;
            return true;
        }else return false;
    }
};
inline vector<Column> lexer(string text){
    //tokenize,要自己寫
    //他會把 input stream 變成 token stream,就是(terminal,value)pair
    vector<Column> token;
    replace(text.begin(),text.end(),',',' ');
    stringstream ss(text);
    while(ss>>text){
        if(text=="a"||text=="of")continue;
        if(text=="list"){
            token.push_back(Column(NameRule["("],"("));
        }else if(text=="and"){
            token.push_back(Column(NameRule[")"],")"));
        }else token.push_back(Column(NameRule["T"],text));
    }
    return token;
}
vector<Column> table;
inline void predict(int col,Rule *rul){
    for(size_t i=0;i<rul->p.size();++i){
        table[col].add(State(rul,i,0,col),col);
    }
}
inline void scan(int col,const State &s,Rule *r){
    if(r!=table[col].term)return;
    State ns(s.r,s.rid,s.dot_id+1,s.start);
    table[col].add(ns,col);
    table[col].div[ns].insert(make_pair(s,State(r,col)));
}
inline void complete(int col,const State &s){
    for(size_t i=0;i<table[s.start].s.size();++i){
        State &st=table[s.start].s[i];
        Rule *term=st.next_term();
        if(!term||term->p.size()==0)continue;
        if(term==s.r){
            State nst(st.r,st.rid,st.dot_id+1,st.start);
            table[col].add(nst,col);
            table[col].div[nst].insert(make_pair(st,s));
        }
    }
}
inline pair<bool,State> parse(Rule *GAMMA,const vector<Column > &token){
    table.resize(token.size()+1);
    for(size_t i=0;i<token.size();++i){
        table[i+1]=Column(token[i]);
    }
    table[0]=Column();
    table[0].add(State(GAMMA,0,0,0),0);
    for(size_t i=0;i<table.size();++i){
        for(size_t j=0;j<table[i].s.size();++j){
            State state=table[i].s[j];
            if(state.completed()){
                complete(i,state);
            }else{
                Rule *term=state.next_term();
                if(term->p.size()){
                    predict(i,term);
                }else if(i+1<table.size()){
                    scan(i+1,state,term);
                }
            }
        }
    }
    for(size_t i=0;i<table.back().s.size();++i){
        if(table.back().s[i].r==GAMMA&&table.back().s[i].completed()){
            return make_pair(true,table.back().s[i]);
        }
    }
    return make_pair(false,State(0,-1));
}
struct node{//語法樹的節點
    State s;
    vector<vector<node*> > child;//vector<node*>.size()>1表示ambiguous
    node(const State &s):s(s){}
    node(){}
};
struct State_end_cmp{
    bool operator()(const State &a,const State &b)const{
        return a.end<b.end||(a.end==b.end&&a<b);
    }
};
map<State,node*,State_end_cmp> cache;
vector<node*> node_set;
inline void init_cache(){
    for(auto d:node_set)delete d;
    cache.clear();
    node_set.clear();
}
void _build_tree(const State &s,node *pa,bool amb=0){
    if(cache.find(s)!=cache.end()){
        pa->child.push_back(vector<node*>(1,cache[s]));
        return;
    }
    node *o;
    if(s.completed()){
        o=new node(s);
        if(amb)pa->child.back().push_back(o);
        else pa->child.push_back(vector<node*>(1,o));
    }else o=pa->child.back().back();
    amb=0;
    for(auto div:table[s.end].div[s]){
        if(!amb)_build_tree(div.first,pa);
        _build_tree(div.second,o,amb);
        amb=1;
    }
    if(s.completed())cache[s]=o;
}
inline node *build_tree(const State &s){
    init_cache();
    node o;
    _build_tree(s,&o);
    assert(o.child.size()==1);
    assert(o.child.back().size()==1);
    return o.child.back().back();
}
void print_tree(node *o,int dep=0){
    cout<<string(dep,' '),o->s.print();
    for(auto div:o->child){
        for(auto nd:div){
            print_tree(nd,dep+2);
        }
    }
}
//開始寫code 
inline Rule *get_my_Rule(){
    Rule *S=add_rule("S"),*E=add_rule("E"),*L=add_rule("L");
    Rule *list=add_rule("("),*AND=add_rule(")"),*T=add_rule("T");
    S->add({list,E});
    S->add({list,L});
    L->add({E,L});
    L->add({E,AND,E});
    E->add({T});
    E->add({S});
    Rule *GAMMA=add_rule("GAMMA");//一定要有gamma rule當作是最上層的語法
    GAMMA->add({S});
    return GAMMA;
}
vector<string> ans;
bool dfs(node *o){
    if(o->child.empty()){
        if(o->s.r==NameRule["T"])ans.push_back(table[o->s.end].value);
        return 1;
    }
    bool is=o->s.r==NameRule["S"];
    if(is)ans.push_back("(");
    for(auto div:o->child){
        if(div.size()>1)return 0;
        if(!dfs(div[0]))return 0;
    }
    if(is)ans.push_back(")");
    return 1;
}
int n;
string s;
vector<string> tok;
int main(){
    init_Rule();
    Rule *GAMMA=get_my_Rule();
    cin.tie(0);
        ios::sync_with_stdio(0);
    cin>>n;
    cin.get();
    while(n--){
        getline(cin,s);
        State END=parse(GAMMA,lexer(s)).second;
        init_cache();
        node *root=build_tree(END);
        ans.clear();
        if(!dfs(root)){
            cout<<"AMBIGUOUS\n";
        }else{
            string pre;
            for(size_t i=0;i<ans.size();++i){
                if(pre!="("&&pre!=""&&ans[i]!=")")cout<<' ';
                cout<<(pre=ans[i]);
            }
            cout<<'\n';
        }
    }
    return 0;
}

沒有留言:

張貼留言