日月卦長的解題紀錄: [ 2007-2008 Summer Petrozavodsk Camp, Japanese Contest ] A. Reading Brackets in English

題目在這裡:http://codeforces.com/gym/100963

這題就是給你一個固定的語法(不是題目輸入的)，之後給你n個字串，你要判斷這個字串用給的語法去分析是否ambigous，如果不是就按題目要求輸出他的語法結構。

這裡我把給的語法轉化成形式文法，如下:
$
S \Longrightarrow list \; E \\
S \Longrightarrow list \; L \\
L \Longrightarrow E \; L \\
L \Longrightarrow E \; AND \; E \\
E \Longrightarrow T \\
E \Longrightarrow S
$
這裡的$list$就是原本的"a list of"、$AND$就是"and"、$T$就是所有的合法英文單字。之後利用CYK算法跟earley算法去分析並找出ambiguous。

首先把形式文法轉乘CNF表達式:
$
S \Longrightarrow W_1 \; S \\
S \Longrightarrow W_1 \; T \\
S \Longrightarrow W_1 \; L \\
L \Longrightarrow S \; L \\
L \Longrightarrow T \; L \\
L \Longrightarrow S \; L^{'} \\
L \Longrightarrow T \; L^{'} \\
L^{'} \Longrightarrow W_2 \; S \\
L^{'} \Longrightarrow W_2 \; T
$
這裡的$W_1$就是原本的"a list of"、$W_2$就是"and"、$T$就是所有的合法英文單字。
接著就可以跑CYK演算法了:

#include<bits/stdc++.h>
using namespace std;
enum P{//symbol
    S,L,LP,T,W1,W2,NUM
};// T="A"|"B"|"C"..., W1="a list of", w2="and" 
struct CNF{
    P s,a,b;// s->ab
    CNF(P s,P a,P b):s(s),a(a),b(b){}
};
vector<CNF> cnf;
inline void cnf_init(){
    cnf.push_back(CNF(S,W1,S));
    cnf.push_back(CNF(S,W1,T));
    cnf.push_back(CNF(S,W1,L));
    cnf.push_back(CNF(L,S,L));
    cnf.push_back(CNF(L,T,L));
    cnf.push_back(CNF(L,S,LP));
    cnf.push_back(CNF(L,T,LP));
    cnf.push_back(CNF(LP,W2,S));
    cnf.push_back(CNF(LP,W2,T));
}
vector<pair<string,P> > tok;
int dp[400][400][P::NUM];
bool amb[400][400][P::NUM];//ambiguous 
inline void CYK(){
    for(size_t i=0;i<tok.size();++i){
        memset(dp[i][i],0,sizeof(dp[i][i]));
        memset(amb[i][i],0,sizeof(amb[i][i]));
        dp[i][i][tok[i].second]=i+1;
    }
    for(int r=0;r<(int)tok.size();++r){
        for(int l=r-1;l>=0;--l){
            memset(dp[l][r],0,sizeof(dp[l][r]));
            memset(amb[l][r],0,sizeof(amb[l][r]));
            for(int k=l;k<r;++k){
                for(size_t i=0;i<cnf.size();++i){
                    if(dp[l][k][cnf[i].a]&&dp[k+1][r][cnf[i].b]){
                        if(dp[l][r][cnf[i].s]){
                            amb[l][r][cnf[i].s]=true;
                        }else{
                            dp[l][r][cnf[i].s]=k+1;
                            amb[l][r][cnf[i].s]=amb[l][k][cnf[i].a]||amb[k+1][r][cnf[i].b];
                        }
                    }
                }
            }
        }
    }
}
vector<string> ans;
void dfs(int l,int r,P type){
    if(l==r){
        if(type==P::T)ans.push_back(tok[l].first);
        return;
    }
    bool is=type==P::S;
    if(is)ans.push_back("(");
    int k=dp[l][r][type]-1;
    for(size_t i=0;i<cnf.size();++i){
        if(dp[l][k][cnf[i].a]&&dp[k+1][r][cnf[i].b]){
            dfs(l,k,cnf[i].a);
            dfs(k+1,r,cnf[i].b);
            break;
        }
    }
    if(is)ans.push_back(")");
}
int n;
string s;
int main(){
    cin.tie(0);
        ios::sync_with_stdio(0);
    cnf_init();
    cin>>n;
    cin.get();
    while(n--){
        getline(cin,s);
        replace(s.begin(),s.end(),',',' ');
        stringstream ss(s);
        tok.clear();
        while(ss>>s){
            if(s=="a"||s=="of")continue;
            if(s=="list"){
                tok.push_back(make_pair("(",P::W1));
            }else if(s=="and"){
                tok.push_back(make_pair(")",P::W2));
            }else tok.push_back(make_pair(s,P::T));
        }
        CYK();
        if(amb[0][tok.size()-1][P::S]){
            cout<<"AMBIGUOUS\n";
        }else{
            ans.clear();
            dfs(0,tok.size()-1,P::S);
            string pre;
            for(size_t i=0;i<ans.size();++i){
                if(pre!="("&&pre!=""&&ans[i]!=")")cout<<' ';
                cout<<(pre=ans[i]);
            }
            cout<<'\n';
        }
    }
    return 0;
}

第二種方式是用earlye直接做，因為earlye可以吃除了空字串之外的任何一種規則，所以可以直接建構語法自動機，我這裡是先建構完自動機後，再從他去建構語法樹:

#include<bits/stdc++.h>
using namespace std;
struct Rule{
    vector<vector<Rule*> > p;
    void add(const vector<Rule*> &l){
        p.push_back(l);
    }
};
map<string,Rule*> NameRule;
map<Rule*,string> RuleName;
inline void init_Rule(){
    for(auto r:RuleName)delete r.first;
    RuleName.clear();
    NameRule.clear();
}
inline Rule *add_rule(const string &s){
    if(NameRule.find(s)!=NameRule.end())return NameRule[s];
    Rule *r=new Rule();
    RuleName[r]=s;
    NameRule[s]=r;
    return r;
}
typedef vector<Rule*> production;
struct State{
    Rule *r;
    int rid,dot_id,start,end;
    State(Rule *r,int rid,int dot,int start):r(r),rid(rid),dot_id(dot),start(start),end(-1){}
    State(Rule *r=0,int col=0):r(r),rid(-1),dot_id(-1),start(-1),end(col){}
    bool completed()const{
        return rid==-1||dot_id>=(int)r->p[rid].size();
    }
    Rule *next_term()const{
        if(completed())return 0;
        return r->p[rid][dot_id];
    }
    bool operator<(const State& b)const{
        if(start!=b.start)return start<b.start;
        if(dot_id!=b.dot_id)return dot_id<b.dot_id;
        if(r!=b.r)return r<b.r;
        return rid<b.rid;
    }
    void print()const{
        cout<<RuleName[r]<<"->";
        if(rid!=-1)for(size_t i=0;;++i){
            if((int)i==dot_id)cout<<" "<<"$";
            if(i>=r->p[rid].size())break;
            cout<<" "<<RuleName[r->p[rid][i]];
        }
        cout<<" "<<"["<<start<<","<<end<<"]"<<endl;
    }
};
struct Column{
    Rule *term;
    string value;
    vector<State> s;
    map<State,set<pair<State,State> > > div;
    //div比較像一棵 左兄右子的樹
    Column(Rule *r,const string &s):term(r),value(s){}
    Column(){}
    bool add(const State &st,int col){
        if(div.find(st)==div.end()){
            div[st];
            s.push_back(st);
            s.back().end=col;
            return true;
        }else return false;
    }
};
inline vector<Column> lexer(string text){
    //tokenize，要自己寫
    //他會把 input stream 變成 token stream，就是(terminal,value)pair
    vector<Column> token;
    replace(text.begin(),text.end(),',',' ');
    stringstream ss(text);
    while(ss>>text){
        if(text=="a"||text=="of")continue;
        if(text=="list"){
            token.push_back(Column(NameRule["("],"("));
        }else if(text=="and"){
            token.push_back(Column(NameRule[")"],")"));
        }else token.push_back(Column(NameRule["T"],text));
    }
    return token;
}
vector<Column> table;
inline void predict(int col,Rule *rul){
    for(size_t i=0;i<rul->p.size();++i){
        table[col].add(State(rul,i,0,col),col);
    }
}
inline void scan(int col,const State &s,Rule *r){
    if(r!=table[col].term)return;
    State ns(s.r,s.rid,s.dot_id+1,s.start);
    table[col].add(ns,col);
    table[col].div[ns].insert(make_pair(s,State(r,col)));
}
inline void complete(int col,const State &s){
    for(size_t i=0;i<table[s.start].s.size();++i){
        State &st=table[s.start].s[i];
        Rule *term=st.next_term();
        if(!term||term->p.size()==0)continue;
        if(term==s.r){
            State nst(st.r,st.rid,st.dot_id+1,st.start);
            table[col].add(nst,col);
            table[col].div[nst].insert(make_pair(st,s));
        }
    }
}
inline pair<bool,State> parse(Rule *GAMMA,const vector<Column > &token){
    table.resize(token.size()+1);
    for(size_t i=0;i<token.size();++i){
        table[i+1]=Column(token[i]);
    }
    table[0]=Column();
    table[0].add(State(GAMMA,0,0,0),0);
    for(size_t i=0;i<table.size();++i){
        for(size_t j=0;j<table[i].s.size();++j){
            State state=table[i].s[j];
            if(state.completed()){
                complete(i,state);
            }else{
                Rule *term=state.next_term();
                if(term->p.size()){
                    predict(i,term);
                }else if(i+1<table.size()){
                    scan(i+1,state,term);
                }
            }
        }
    }
    for(size_t i=0;i<table.back().s.size();++i){
        if(table.back().s[i].r==GAMMA&&table.back().s[i].completed()){
            return make_pair(true,table.back().s[i]);
        }
    }
    return make_pair(false,State(0,-1));
}
struct node{//語法樹的節點
    State s;
    vector<vector<node*> > child;//vector<node*>.size()>1表示ambiguous
    node(const State &s):s(s){}
    node(){}
};
struct State_end_cmp{
    bool operator()(const State &a,const State &b)const{
        return a.end<b.end||(a.end==b.end&&a<b);
    }
};
map<State,node*,State_end_cmp> cache;
vector<node*> node_set;
inline void init_cache(){
    for(auto d:node_set)delete d;
    cache.clear();
    node_set.clear();
}
void _build_tree(const State &s,node *pa,bool amb=0){
    if(cache.find(s)!=cache.end()){
        pa->child.push_back(vector<node*>(1,cache[s]));
        return;
    }
    node *o;
    if(s.completed()){
        o=new node(s);
        if(amb)pa->child.back().push_back(o);
        else pa->child.push_back(vector<node*>(1,o));
    }else o=pa->child.back().back();
    amb=0;
    for(auto div:table[s.end].div[s]){
        if(!amb)_build_tree(div.first,pa);
        _build_tree(div.second,o,amb);
        amb=1;
    }
    if(s.completed())cache[s]=o;
}
inline node *build_tree(const State &s){
    init_cache();
    node o;
    _build_tree(s,&o);
    assert(o.child.size()==1);
    assert(o.child.back().size()==1);
    return o.child.back().back();
}
void print_tree(node *o,int dep=0){
    cout<<string(dep,' '),o->s.print();
    for(auto div:o->child){
        for(auto nd:div){
            print_tree(nd,dep+2);
        }
    }
}
//開始寫code 
inline Rule *get_my_Rule(){
    Rule *S=add_rule("S"),*E=add_rule("E"),*L=add_rule("L");
    Rule *list=add_rule("("),*AND=add_rule(")"),*T=add_rule("T");
    S->add({list,E});
    S->add({list,L});
    L->add({E,L});
    L->add({E,AND,E});
    E->add({T});
    E->add({S});
    Rule *GAMMA=add_rule("GAMMA");//一定要有gamma rule當作是最上層的語法
    GAMMA->add({S});
    return GAMMA;
}
vector<string> ans;
bool dfs(node *o){
    if(o->child.empty()){
        if(o->s.r==NameRule["T"])ans.push_back(table[o->s.end].value);
        return 1;
    }
    bool is=o->s.r==NameRule["S"];
    if(is)ans.push_back("(");
    for(auto div:o->child){
        if(div.size()>1)return 0;
        if(!dfs(div[0]))return 0;
    }
    if(is)ans.push_back(")");
    return 1;
}
int n;
string s;
vector<string> tok;
int main(){
    init_Rule();
    Rule *GAMMA=get_my_Rule();
    cin.tie(0);
        ios::sync_with_stdio(0);
    cin>>n;
    cin.get();
    while(n--){
        getline(cin,s);
        State END=parse(GAMMA,lexer(s)).second;
        init_cache();
        node *root=build_tree(END);
        ans.clear();
        if(!dfs(root)){
            cout<<"AMBIGUOUS\n";
        }else{
            string pre;
            for(size_t i=0;i<ans.size();++i){
                if(pre!="("&&pre!=""&&ans[i]!=")")cout<<' ';
                cout<<(pre=ans[i]);
            }
            cout<<'\n';
        }
    }
    return 0;
}

日月卦長的解題紀錄

latex預處理

2016年10月27日星期四

[ 2007-2008 Summer Petrozavodsk Camp, Japanese Contest ] A. Reading Brackets in English

沒有留言:

張貼留言

latex預處理

2016年10月27日 星期四

[ 2007-2008 Summer Petrozavodsk Camp, Japanese Contest ] A. Reading Brackets in English

沒有留言:

張貼留言

2016年10月27日星期四