00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef FST_LIB_COMPACT_FST_H__
00024 #define FST_LIB_COMPACT_FST_H__
00025
00026 #include <iterator>
00027 #include <utility>
00028 using std::pair; using std::make_pair;
00029 #include <vector>
00030 using std::vector;
00031
00032 #include <fst/cache.h>
00033 #include <fst/expanded-fst.h>
00034 #include <fst/fst-decl.h>
00035 #include <fst/matcher.h>
00036 #include <fst/test-properties.h>
00037 #include <fst/util.h>
00038
00039 namespace fst {
00040
00041 struct CompactFstOptions : public CacheOptions {
00042
00043
00044
00045 CompactFstOptions() : CacheOptions(true, 0) {}
00046 CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
00047 };
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129 template <class A, class C, class U>
00130 class CompactFstData {
00131 public:
00132 typedef A Arc;
00133 typedef typename A::Weight Weight;
00134 typedef typename A::StateId StateId;
00135 typedef C Compactor;
00136 typedef typename C::Element CompactElement;
00137 typedef U Unsigned;
00138
00139 CompactFstData()
00140 : states_(0),
00141 compacts_(0),
00142 nstates_(0),
00143 ncompacts_(0),
00144 narcs_(0),
00145 start_(kNoStateId) {}
00146
00147 CompactFstData(const Fst<A> &fst, const Compactor &compactor);
00148
00149 template <class Iterator>
00150 CompactFstData(const Iterator &begin, const Iterator &end,
00151 const Compactor &compactor);
00152
00153 ~CompactFstData() {
00154 delete[] states_;
00155 delete[] compacts_;
00156 }
00157
00158 static CompactFstData<A, C, U> *Read(istream &strm,
00159 const FstReadOptions &opts,
00160 const FstHeader &hdr,
00161 const Compactor &compactor);
00162
00163 bool Write(ostream &strm, const FstWriteOptions &opts,
00164 const C &compactor) const;
00165
00166 Unsigned States(StateId i) const { return states_[i]; }
00167 const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
00168 StateId NumStates() const { return nstates_; }
00169 size_t NumCompacts() const { return ncompacts_; }
00170 size_t NumArcs() const { return narcs_; }
00171 StateId Start() const { return start_; }
00172
00173 int RefCount() const { return ref_count_.count(); }
00174 int IncrRefCount() { return ref_count_.Incr(); }
00175 int DecrRefCount() { return ref_count_.Decr(); }
00176
00177 private:
00178
00179 static const int kFileAlign = 16;
00180
00181 Unsigned *states_;
00182 CompactElement *compacts_;
00183 StateId nstates_;
00184 size_t ncompacts_;
00185 size_t narcs_;
00186 StateId start_;
00187 RefCounter ref_count_;
00188 };
00189
00190 template <class A, class C, class U>
00191 const int CompactFstData<A, C, U>::kFileAlign;
00192
00193
00194 template <class A, class C, class U>
00195 CompactFstData<A, C, U>::CompactFstData(const Fst<A> &fst,
00196 const C &compactor)
00197 : states_(0),
00198 compacts_(0),
00199 nstates_(0),
00200 ncompacts_(0),
00201 narcs_(0),
00202 start_(kNoStateId) {
00203 start_ = fst.Start();
00204
00205 StateId nfinals = 0;
00206 for (StateIterator< Fst<A> > siter(fst);
00207 !siter.Done();
00208 siter.Next()) {
00209 ++nstates_;
00210 StateId s = siter.Value();
00211 for (ArcIterator< Fst<A> > aiter(fst, s);
00212 !aiter.Done();
00213 aiter.Next())
00214 ++narcs_;
00215 if (fst.Final(s) != Weight::Zero()) ++nfinals;
00216 }
00217 if (compactor.Size() == -1) {
00218 states_ = new Unsigned[nstates_ + 1];
00219 ncompacts_ = narcs_ + nfinals;
00220 compacts_ = new CompactElement[ncompacts_];
00221 states_[nstates_] = ncompacts_;
00222 } else {
00223 states_ = 0;
00224 ncompacts_ = nstates_ * compactor.Size();
00225 if ((narcs_ + nfinals) != ncompacts_)
00226 LOG(FATAL) << "CompactFstData: compactor incompatible with fst";
00227 compacts_ = new CompactElement[ncompacts_];
00228 }
00229 size_t pos = 0, fpos = 0;
00230 for (StateId s = 0; s < nstates_; ++s) {
00231 fpos = pos;
00232 if (compactor.Size() == -1)
00233 states_[s] = pos;
00234 if (fst.Final(s) != Weight::Zero())
00235 compacts_[pos++] = compactor.Compact(s, Arc(kNoLabel, kNoLabel,
00236 fst.Final(s), kNoStateId));
00237 for (ArcIterator< Fst<A> > aiter(fst, s);
00238 !aiter.Done();
00239 aiter.Next())
00240 compacts_[pos++] = compactor.Compact(s, aiter.Value());
00241 if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size()))
00242 LOG(FATAL) << "CompactFstData: compactor incompatible with fst";
00243 }
00244 if (pos != ncompacts_)
00245 LOG(FATAL) << "CompactFstData: compactor incompatible with fst";
00246 }
00247
00248 template <class A, class C, class U>
00249 template <class Iterator>
00250 CompactFstData<A, C, U>::CompactFstData(const Iterator &begin,
00251 const Iterator &end,
00252 const C &compactor)
00253 : states_(0),
00254 compacts_(0),
00255 nstates_(0),
00256 ncompacts_(0),
00257 narcs_(0),
00258 start_(kNoStateId) {
00259
00260 if (compactor.Size() != -1) {
00261 ncompacts_ = distance(begin, end);
00262 if (compactor.Size() == 1) {
00263 if (ncompacts_ == 0) {
00264 ++ncompacts_;
00265 } else {
00266 A arc = compactor.Expand(ncompacts_ - 1,
00267 *(begin + (ncompacts_ - 1)));
00268 if (arc.ilabel != kNoLabel)
00269 ++ncompacts_;
00270 }
00271 }
00272 if (ncompacts_ % compactor.Size())
00273 LOG(FATAL) << "CompactFstData: size of input container incompatible"
00274 << " with compactor";
00275 if (ncompacts_ == 0)
00276 return;
00277 start_ = 0;
00278 nstates_ = ncompacts_ / compactor.Size();
00279 compacts_ = new CompactElement[ncompacts_];
00280 size_t i = 0;
00281 Iterator it = begin;
00282 for(; it != end; ++it, ++i){
00283 compacts_[i] = *it;
00284 if (compactor.Expand(i, *it).ilabel != kNoLabel)
00285 ++narcs_;
00286 }
00287 if (i < ncompacts_)
00288 compacts_[i] = compactor.Compact(i, A(kNoLabel, kNoLabel,
00289 Weight::One(), kNoStateId));
00290 } else {
00291 if (distance(begin, end) == 0)
00292 return;
00293
00294 Iterator it = begin;
00295 for(size_t i = 0; it != end; ++it, ++i) {
00296 A arc = compactor.Expand(i, *it);
00297 if (arc.ilabel != kNoLabel) {
00298 ++narcs_;
00299 ++ncompacts_;
00300 } else {
00301 ++nstates_;
00302 if (arc.weight != Weight::Zero())
00303 ++ncompacts_;
00304 }
00305 }
00306 start_ = 0;
00307 compacts_ = new CompactElement[ncompacts_];
00308 states_ = new Unsigned[nstates_ + 1];
00309 states_[nstates_] = ncompacts_;
00310 size_t i = 0, s = 0;
00311 for(it = begin; it != end; ++it) {
00312 A arc = compactor.Expand(i, *it);
00313 if (arc.ilabel != kNoLabel) {
00314 compacts_[i++] = *it;
00315 } else {
00316 states_[s++] = i;
00317 if (arc.weight != Weight::Zero())
00318 compacts_[i++] = *it;
00319 }
00320 }
00321 if ((s != nstates_) || (i != ncompacts_))
00322 LOG(FATAL) << "CompactFstData: ill-formed input container";
00323 }
00324 }
00325
00326 template<class A, class C, class U>
00327 CompactFstData<A, C, U> *CompactFstData<A, C, U>::Read(
00328 istream &strm,
00329 const FstReadOptions &opts,
00330 const FstHeader &hdr,
00331 const C &compactor) {
00332 CompactFstData<A, C, U> *data = new CompactFstData<A, C, U>();
00333 data->start_ = hdr.Start();
00334 data->nstates_ = hdr.NumStates();
00335 data->narcs_ = hdr.NumArcs();
00336
00337 if (compactor.Size() == -1) {
00338 data->states_ = new Unsigned[data->nstates_ + 1];
00339 char c;
00340 for (int i = 0; i < kFileAlign && strm.tellg() % kFileAlign; ++i)
00341 strm.read(&c, 1);
00342
00343 size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
00344 strm.read(reinterpret_cast<char *>(data->states_), b);
00345 if (!strm) {
00346 LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
00347 return 0;
00348 }
00349 } else {
00350 data->states_ = 0;
00351 }
00352 data->ncompacts_ = compactor.Size() == -1
00353 ? data->states_[data->nstates_]
00354 : data->nstates_ * compactor.Size();
00355 data->compacts_ = new CompactElement[data->ncompacts_];
00356
00357 char c;
00358 size_t b = data->ncompacts_ * sizeof(CompactElement);
00359 for (int i = 0; i < kFileAlign && strm.tellg() % kFileAlign; ++i)
00360 strm.read(&c, 1);
00361 strm.read(reinterpret_cast<char *>(data->compacts_), b);
00362 if (!strm) {
00363 LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
00364 return 0;
00365 }
00366 return data;
00367 }
00368
00369 template<class A, class C, class U>
00370 bool CompactFstData<A, C, U>::Write(ostream &strm,
00371 const FstWriteOptions &opts,
00372 const C &compactor) const {
00373 if (states_) {
00374 for (int i = 0; i < kFileAlign && strm.tellp() % kFileAlign; ++i)
00375 strm.write("", 1);
00376 strm.write(reinterpret_cast<char *>(states_),
00377 (nstates_ + 1) * sizeof(Unsigned));
00378 }
00379 for (int i = 0; i < kFileAlign && strm.tellp() % kFileAlign; ++i)
00380 strm.write("", 1);
00381 strm.write(reinterpret_cast<char *>(compacts_),
00382 ncompacts_ * sizeof(CompactElement));
00383
00384 strm.flush();
00385 if (!strm) {
00386 LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
00387 return false;
00388 }
00389 return true;
00390 }
00391
00392 template <class A, class C, class U> class CompactFst;
00393 template <class F, class G> void Cast(const F &, G *);
00394
00395
00396
00397 template <class A, class C, class U>
00398 class CompactFstImpl : public CacheImpl<A> {
00399 public:
00400 using FstImpl<A>::SetType;
00401 using FstImpl<A>::SetProperties;
00402 using FstImpl<A>::Properties;
00403 using FstImpl<A>::SetInputSymbols;
00404 using FstImpl<A>::SetOutputSymbols;
00405 using FstImpl<A>::WriteHeader;
00406
00407 using VectorFstBaseImpl<typename CacheImpl<A>::State>::NumStates;
00408
00409 using CacheImpl<A>::AddArc;
00410 using CacheImpl<A>::HasArcs;
00411 using CacheImpl<A>::HasFinal;
00412 using CacheImpl<A>::HasStart;
00413 using CacheImpl<A>::SetArcs;
00414 using CacheImpl<A>::SetFinal;
00415 using CacheImpl<A>::SetStart;
00416
00417 friend class StateIterator< CompactFst<A, C, U> >;
00418
00419 typedef A Arc;
00420 typedef typename A::Weight Weight;
00421 typedef typename A::StateId StateId;
00422 typedef C Compactor;
00423 typedef typename C::Element CompactElement;
00424 typedef U Unsigned;
00425
00426 CompactFstImpl()
00427 : CacheImpl<A>(CompactFstOptions()),
00428 compactor_(0),
00429 own_compactor_(false),
00430 data_(0) {
00431 string type = "compact";
00432 if (sizeof(U) != sizeof(uint32)) {
00433 string size;
00434 Int64ToStr(8 * sizeof(U), &size);
00435 type += size;
00436 }
00437 type += "_";
00438 type += C::Type();
00439 SetType(type);
00440 SetProperties(kNullProperties | kStaticProperties);
00441 }
00442
00443 CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
00444 const CompactFstOptions &opts)
00445 : CacheImpl<A>(opts),
00446 compactor_(new C(compactor)),
00447 own_compactor_(true),
00448 data_(0) {
00449 Init(fst);
00450 }
00451
00452 CompactFstImpl(const Fst<Arc> &fst, C *compactor,
00453 const CompactFstOptions &opts)
00454 : CacheImpl<A>(opts),
00455 compactor_(compactor),
00456 own_compactor_(false),
00457 data_(0) {
00458 Init(fst);
00459 }
00460
00461 template <class Iterator>
00462 CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
00463 const CompactFstOptions &opts)
00464 : CacheImpl<A>(opts),
00465 compactor_(new C(compactor)),
00466 own_compactor_(true),
00467 data_(0) {
00468 Init(b, e);
00469 }
00470
00471 template <class Iterator>
00472 CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
00473 const CompactFstOptions &opts)
00474 : CacheImpl<A>(opts),
00475 compactor_(compactor),
00476 own_compactor_(false),
00477 data_(0) {
00478 Init(b, e);
00479 }
00480
00481 CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
00482 : CacheImpl<A>(impl),
00483 compactor_(new C(*impl.compactor_)),
00484 own_compactor_(true),
00485 data_(impl.data_) {
00486 if (data_)
00487 data_->IncrRefCount();
00488 SetType(impl.Type());
00489 SetProperties(impl.Properties());
00490 SetInputSymbols(impl.InputSymbols());
00491 SetOutputSymbols(impl.OutputSymbols());
00492 }
00493
00494 ~CompactFstImpl(){
00495 if (own_compactor_)
00496 delete compactor_;
00497 if (data_ && !data_->DecrRefCount())
00498 delete data_;
00499 }
00500
00501 StateId Start() {
00502 if (!HasStart()) {
00503 SetStart(data_->Start());
00504 }
00505 return CacheImpl<A>::Start();
00506 }
00507
00508 Weight Final(StateId s) {
00509 if (!HasFinal(s)) {
00510 Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
00511 if ((compactor_->Size() != -1) ||
00512 (data_->States(s) != data_->States(s + 1)))
00513 arc = ComputeArc(s,
00514 compactor_->Size() == -1
00515 ? data_->States(s)
00516 : s * compactor_->Size());
00517 SetFinal(s, arc.ilabel == kNoLabel ? arc.weight : Weight::Zero());
00518 }
00519 return CacheImpl<A>::Final(s);
00520 }
00521
00522 StateId NumStates() const { return data_->NumStates();}
00523
00524 size_t NumArcs(StateId s) {
00525 if (HasArcs(s))
00526 return CacheImpl<A>::NumArcs(s);
00527 Unsigned i, num_arcs;
00528 if (compactor_->Size() == -1) {
00529 i = data_->States(s);
00530 num_arcs = data_->States(s + 1) - i;
00531 } else {
00532 i = s * compactor_->Size();
00533 num_arcs = compactor_->Size();
00534 }
00535 if (num_arcs > 0) {
00536 const A &arc = ComputeArc(s, i, kArcILabelValue);
00537 if (arc.ilabel == kNoStateId) {
00538 --num_arcs;
00539 }
00540 }
00541 return num_arcs;
00542 }
00543
00544 size_t NumInputEpsilons(StateId s) {
00545 if (!HasArcs(s) && !Properties(kILabelSorted))
00546 Expand(s);
00547 if (HasArcs(s))
00548 return CacheImpl<A>::NumInputEpsilons(s);
00549 return CountEpsilons(s, false);
00550 }
00551
00552 size_t NumOutputEpsilons(StateId s) {
00553 if (!HasArcs(s) && !Properties(kOLabelSorted))
00554 Expand(s);
00555 if (HasArcs(s))
00556 return CacheImpl<A>::NumOutputEpsilons(s);
00557 return CountEpsilons(s, true);
00558 }
00559
00560 size_t CountEpsilons(StateId s, bool output_epsilons) {
00561 CHECK((!output_epsilons && Properties(kILabelSorted)) ||
00562 (output_epsilons && Properties(kOLabelSorted)));
00563 size_t begin = compactor_->Size() == -1 ?
00564 data_->States(s) : s * compactor_->Size();
00565 size_t end = compactor_->Size() == -1 ?
00566 data_->States(s + 1) : (s + 1) * compactor_->Size();
00567 size_t num_eps = 0;
00568 for (size_t i = begin; i < end; ++i) {
00569 const A &arc = ComputeArc(
00570 s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
00571 const typename A::Label &label =
00572 (output_epsilons ? arc.olabel : arc.ilabel);
00573 if (label == kNoLabel)
00574 continue;
00575 else if (label > 0)
00576 break;
00577 ++num_eps;
00578 }
00579 return num_eps;
00580 }
00581
00582 static CompactFstImpl<A, C, U> *Read(istream &strm,
00583 const FstReadOptions &opts) {
00584 CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
00585 FstHeader hdr;
00586 if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr))
00587 return 0;
00588
00589 impl->compactor_ = C::Read(strm);
00590 impl->own_compactor_ = true;
00591 impl->data_ = CompactFstData<A, C, U>::Read(strm, opts, hdr,
00592 *impl->compactor_);
00593 return impl;
00594 }
00595
00596 bool Write(ostream &strm, const FstWriteOptions &opts) const {
00597 FstHeader hdr;
00598 hdr.SetStart(data_->Start());
00599 hdr.SetNumStates(data_->NumStates());
00600 hdr.SetNumArcs(data_->NumArcs());
00601 WriteHeader(strm, opts, kFileVersion, &hdr);
00602 compactor_->Write(strm);
00603 return data_->Write(strm, opts, *compactor_);
00604 }
00605
00606
00607 void InitStateIterator(StateIteratorData<A> *data) const {
00608 data->base = 0;
00609 data->nstates = data_->NumStates();
00610 }
00611
00612 void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
00613 if (!HasArcs(s))
00614 Expand(s);
00615 CacheImpl<A>::InitArcIterator(s, data);
00616 }
00617
00618 Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
00619 return compactor_->Expand(s, data_->Compacts(i), f);
00620 }
00621
00622 void Expand(StateId s) {
00623 size_t begin = compactor_->Size() == -1 ?
00624 data_->States(s) : s * compactor_->Size();
00625 size_t end = compactor_->Size() == -1 ?
00626 data_->States(s + 1) : (s + 1) * compactor_->Size();
00627 for (size_t i = begin; i < end; ++i) {
00628 const Arc &arc = ComputeArc(s, i);
00629 if (arc.ilabel == kNoLabel) continue;
00630 AddArc(s, arc);
00631 }
00632 SetArcs(s);
00633 }
00634
00635 template <class Iterator>
00636 void SetCompactElements(const Iterator &b, const Iterator &e) {
00637 if (data_ && !data_->DecrRefCount())
00638 delete data_;
00639 data_ = new CompactFstData<A, C, U>(b, e, *compactor_);
00640 }
00641
00642 C *GetCompactor() const { return compactor_; }
00643 CompactFstData<A, C, U> *Data() const { return data_; }
00644
00645 private:
00646 void Init(const Fst<Arc> &fst) {
00647 string type = "compact";
00648 if (sizeof(U) != sizeof(uint32)) {
00649 string size;
00650 Int64ToStr(8 * sizeof(U), &size);
00651 type += size;
00652 }
00653 type += "_";
00654 type += compactor_->Type();
00655 SetType(type);
00656 uint64 copy_properties = fst.Properties(kCopyProperties, true);
00657 if (!compactor_->Compatible(fst))
00658 LOG(FATAL) << "CompactFstImpl: input fst incompatible with compactor";
00659 SetProperties(copy_properties | kStaticProperties);
00660 SetInputSymbols(fst.InputSymbols());
00661 SetOutputSymbols(fst.OutputSymbols());
00662 data_ = new CompactFstData<A, C, U>(fst, *compactor_);
00663 }
00664
00665 template <class Iterator>
00666 void Init(const Iterator &b, const Iterator &e) {
00667 string type = "compact";
00668 if (sizeof(U) != sizeof(uint32)) {
00669 string size;
00670 Int64ToStr(8 * sizeof(U), &size);
00671 type += size;
00672 }
00673 type += "_";
00674 type += compactor_->Type();
00675 SetType(type);
00676 SetProperties(kStaticProperties | compactor_->Properties());
00677 data_ = new CompactFstData<A, C, U>(b, e, *compactor_);
00678 }
00679
00680
00681 static const uint64 kStaticProperties = kExpanded;
00682
00683 static const int kFileVersion = 1;
00684
00685 static const int kMinFileVersion = 1;
00686
00687 C *compactor_;
00688 bool own_compactor_;
00689 CompactFstData<A, C, U> *data_;
00690 };
00691
00692 template <class A, class C, class U>
00693 const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
00694 template <class A, class C, class U>
00695 const int CompactFstImpl<A, C, U>::kFileVersion;
00696 template <class A, class C, class U>
00697 const int CompactFstImpl<A, C, U>::kMinFileVersion;
00698
00699
00700
00701
00702
00703
00704
00705 template <class A, class C, class U>
00706 class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
00707 public:
00708 friend class StateIterator< CompactFst<A, C, U> >;
00709 friend class ArcIterator< CompactFst<A, C, U> >;
00710 template <class F, class G> void friend Cast(const F &, G *);
00711
00712 typedef A Arc;
00713 typedef typename A::StateId StateId;
00714 typedef CompactFstImpl<A, C, U> Impl;
00715 typedef CacheState<A> State;
00716 typedef U Unsigned;
00717
00718 CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
00719
00720 explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
00721 const CompactFstOptions &opts = CompactFstOptions())
00722 : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
00723
00724 CompactFst(const Fst<A> &fst, C *compactor,
00725 const CompactFstOptions &opts = CompactFstOptions())
00726 : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747 template <class Iterator>
00748 explicit CompactFst(const Iterator &begin, const Iterator &end,
00749 const C &compactor = C(),
00750 const CompactFstOptions &opts = CompactFstOptions())
00751 : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
00752
00753 template <class Iterator>
00754 CompactFst(const Iterator &begin, const Iterator &end,
00755 C *compactor, const CompactFstOptions &opts = CompactFstOptions())
00756 : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
00757
00758
00759 CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
00760 : ImplToExpandedFst<Impl>(fst, safe) {}
00761
00762
00763 virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
00764 return new CompactFst<A, C, U>(*this, safe);
00765 }
00766
00767
00768 static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
00769 Impl* impl = Impl::Read(strm, opts);
00770 return impl ? new CompactFst<A, C, U>(impl) : 0;
00771 }
00772
00773
00774
00775 static CompactFst<A, C, U> *Read(const string &filename) {
00776 Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
00777 return impl ? new CompactFst<A, C, U>(impl) : 0;
00778 }
00779
00780 virtual void InitStateIterator(StateIteratorData<A> *data) const {
00781 GetImpl()->InitStateIterator(data);
00782 }
00783
00784 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
00785 GetImpl()->InitArcIterator(s, data);
00786 }
00787
00788 virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
00789 return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
00790 }
00791
00792 template <class Iterator>
00793 void SetCompactElements(const Iterator &b, const Iterator &e) {
00794 GetImpl()->SetCompactElements(b, e);
00795 }
00796
00797 private:
00798 CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
00799
00800
00801 Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
00802
00803 void SetImpl(Impl *impl, bool own_impl = false) {
00804 ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
00805 }
00806
00807 void operator=(const CompactFst<A, C, U> &fst);
00808 };
00809
00810
00811
00812
00813
00814 template <class A, class C, class U>
00815 class StateIterator< CompactFst<A, C, U> > {
00816 public:
00817 typedef typename A::StateId StateId;
00818
00819 explicit StateIterator(const CompactFst<A, C, U> &fst)
00820 : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
00821
00822 bool Done() const { return s_ >= nstates_; }
00823
00824 StateId Value() const { return s_; }
00825
00826 void Next() { ++s_; }
00827
00828 void Reset() { s_ = 0; }
00829
00830 private:
00831 StateId nstates_;
00832 StateId s_;
00833
00834 DISALLOW_COPY_AND_ASSIGN(StateIterator);
00835 };
00836
00837
00838
00839 template <class A, class C, class U>
00840 class ArcIterator< CompactFst<A, C, U> > {
00841 public:
00842 typedef typename A::StateId StateId;
00843 typedef typename C::Element CompactElement;
00844
00845 ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
00846 : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0),
00847 pos_(0), flags_(kArcValueFlags) {
00848
00849 const CompactFstData<A, C, U> *data = fst.GetImpl()->Data();
00850 size_t offset;
00851 if (compactor_->Size() == -1) {
00852 offset = data->States(s);
00853 num_arcs_ = data->States(s + 1) - offset;
00854 } else {
00855 offset = s * compactor_->Size();
00856 num_arcs_ = compactor_->Size();
00857 }
00858 if (num_arcs_ > 0) {
00859 compacts_ = &(data->Compacts(offset));
00860 arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue);
00861 if (arc_.ilabel == kNoStateId) {
00862 ++compacts_;
00863 --num_arcs_;
00864 }
00865 }
00866 }
00867
00868 ~ArcIterator() {}
00869
00870 bool Done() const { return pos_ >= num_arcs_; }
00871
00872 const A& Value() const {
00873 arc_ = compactor_->Expand(state_, compacts_[pos_], flags_);
00874 return arc_;
00875 }
00876
00877 void Next() { ++pos_; }
00878
00879 size_t Position() const { return pos_; }
00880
00881 void Reset() { pos_ = 0; }
00882
00883 void Seek(size_t pos) { pos_ = pos; }
00884
00885 uint32 Flags() const { return flags_; }
00886
00887 void SetFlags(uint32 f, uint32 m) {
00888 flags_ &= ~m;
00889 flags_ |= (f & kArcValueFlags);
00890 }
00891
00892 private:
00893 C *compactor_;
00894 StateId state_;
00895 const CompactElement *compacts_;
00896 size_t pos_;
00897 size_t num_arcs_;
00898 mutable A arc_;
00899 uint32 flags_;
00900
00901 DISALLOW_COPY_AND_ASSIGN(ArcIterator);
00902 };
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999 template <class A>
01000 class StringCompactor {
01001 public:
01002 typedef A Arc;
01003 typedef typename A::Label Element;
01004 typedef typename A::Label Label;
01005 typedef typename A::StateId StateId;
01006 typedef typename A::Weight Weight;
01007
01008 Element Compact(StateId s, const A &arc) const { return arc.ilabel; }
01009
01010 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
01011 return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
01012 }
01013
01014 ssize_t Size() const { return 1; }
01015
01016 uint64 Properties() const {
01017 return kString | kAcceptor | kUnweighted;
01018 }
01019
01020 bool Compatible(const Fst<A> &fst) const {
01021 uint64 props = Properties();
01022 return fst.Properties(props, true) == props;
01023 }
01024
01025 static const string &Type() {
01026 static const string type = "string";
01027 return type;
01028 }
01029
01030 bool Write(ostream &strm) const { return true; }
01031
01032 static StringCompactor *Read(istream &strm) {
01033 return new StringCompactor;
01034 }
01035 };
01036
01037
01038
01039 template <class A>
01040 class WeightedStringCompactor {
01041 public:
01042 typedef A Arc;
01043 typedef typename A::Label Label;
01044 typedef typename A::StateId StateId;
01045 typedef typename A::Weight Weight;
01046 typedef pair<Label, Weight> Element;
01047
01048 Element Compact(StateId s, const A &arc) const {
01049 return make_pair(arc.ilabel, arc.weight);
01050 }
01051
01052 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
01053 return Arc(p.first, p.first, p.second,
01054 p.first != kNoLabel ? s + 1 : kNoStateId);
01055 }
01056
01057 ssize_t Size() const { return 1;}
01058
01059 uint64 Properties() const {
01060 return kString | kAcceptor;
01061 }
01062
01063 bool Compatible(const Fst<A> &fst) const {
01064 uint64 props = Properties();
01065 return fst.Properties(props, true) == props;
01066 }
01067
01068 static const string &Type() {
01069 static const string type = "weighted_string";
01070 return type;
01071 }
01072
01073 bool Write(ostream &strm) const { return true; }
01074
01075 static WeightedStringCompactor *Read(istream &strm) {
01076 return new WeightedStringCompactor;
01077 }
01078 };
01079
01080
01081
01082 template <class A>
01083 class UnweightedAcceptorCompactor {
01084 public:
01085 typedef A Arc;
01086 typedef typename A::Label Label;
01087 typedef typename A::StateId StateId;
01088 typedef typename A::Weight Weight;
01089 typedef pair<Label, StateId> Element;
01090
01091 Element Compact(StateId s, const A &arc) const {
01092 return make_pair(arc.ilabel, arc.nextstate);
01093 }
01094
01095 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
01096 return Arc(p.first, p.first, Weight::One(), p.second);
01097 }
01098
01099 ssize_t Size() const { return -1;}
01100
01101 uint64 Properties() const {
01102 return kAcceptor | kUnweighted;
01103 }
01104
01105 bool Compatible(const Fst<A> &fst) const {
01106 uint64 props = Properties();
01107 return fst.Properties(props, true) == props;
01108 }
01109
01110 static const string &Type() {
01111 static const string type = "unweighted_acceptor";
01112 return type;
01113 }
01114
01115 bool Write(ostream &strm) const { return true; }
01116
01117 static UnweightedAcceptorCompactor *Read(istream &istrm) {
01118 return new UnweightedAcceptorCompactor;
01119 }
01120 };
01121
01122
01123
01124 template <class A>
01125 class AcceptorCompactor {
01126 public:
01127 typedef A Arc;
01128 typedef typename A::Label Label;
01129 typedef typename A::StateId StateId;
01130 typedef typename A::Weight Weight;
01131 typedef pair< pair<Label, Weight>, StateId > Element;
01132
01133 Element Compact(StateId s, const A &arc) const {
01134 return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate);
01135 }
01136
01137 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
01138 return Arc(p.first.first, p.first.first, p.first.second, p.second);
01139 }
01140
01141 ssize_t Size() const { return -1;}
01142
01143 uint64 Properties() const {
01144 return kAcceptor;
01145 }
01146
01147 bool Compatible(const Fst<A> &fst) const {
01148 uint64 props = Properties();
01149 return fst.Properties(props, true) == props;
01150 }
01151
01152 static const string &Type() {
01153 static const string type = "acceptor";
01154 return type;
01155 }
01156
01157 bool Write(ostream &strm) const { return true; }
01158
01159 static AcceptorCompactor *Read(istream &strm) {
01160 return new AcceptorCompactor;
01161 }
01162 };
01163
01164
01165
01166 template <class A>
01167 class UnweightedCompactor {
01168 public:
01169 typedef A Arc;
01170 typedef typename A::Label Label;
01171 typedef typename A::StateId StateId;
01172 typedef typename A::Weight Weight;
01173 typedef pair< pair<Label, Label>, StateId > Element;
01174
01175 Element Compact(StateId s, const A &arc) const {
01176 return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate);
01177 }
01178
01179 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
01180 return Arc(p.first.first, p.first.second, Weight::One(), p.second);
01181 }
01182
01183 ssize_t Size() const { return -1; }
01184
01185 uint64 Properties() const {
01186 return kUnweighted;
01187 }
01188
01189 bool Compatible(const Fst<A> &fst) const {
01190 uint64 props = Properties();
01191 return fst.Properties(props, true) == props;
01192 }
01193
01194 static const string &Type() {
01195 static const string type = "unweighted";
01196 return type;
01197 }
01198
01199 bool Write(ostream &strm) const { return true; }
01200
01201 static UnweightedCompactor *Read(istream &strm) {
01202 return new UnweightedCompactor;
01203 }
01204 };
01205
01206
01207
01208 typedef CompactFst< StdArc, StringCompactor<StdArc> >
01209 StdCompactStringFst;
01210 typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
01211 StdCompactWeightedStringFst;
01212 typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
01213 StdCompactAcceptorFst;
01214 typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
01215 StdCompactUnweightedFst;
01216 typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
01217 StdCompactUnweightedAcceptorFst;
01218
01219 }
01220
01221 #endif /// FST_LIB_COMPACT_FST_H__
01222