00001 // Copyright (C) 2006 Bryan Jacobson <bryanjacobson@users.sourceforge.net> 00002 // This program is licensed under the terms of the GNU General Public License 00003 // version 2, as published by the Free Software Foundation. 00004 // This program is distributed in the hope that it will be useful, 00005 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00006 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00007 00011 00012 #ifndef DICTIONARY_H 00013 #define DICTIONARY_H 00014 00016 class Word 00017 { 00018 public: 00020 Word(Id word, Id gtype, Word *pCompound, Data *p_data = NULL, 00021 Word *pNext = NULL); 00022 00024 void Link(Word *word); 00025 00027 void Print(); 00028 00030 Id GetWord() { return mWord; }; 00031 00033 Id GetGramType() { return mGtype; }; 00034 00036 Data *GetData() { return mpData; }; 00037 00039 Word *GetCompound() { return mpComp; }; 00040 00042 Word *Next() { return mpNext; }; 00043 00045 int GetFrequency() { return mFreq; } 00046 private: 00047 Id mWord; // Id of the word itself, as in "the". 00048 Id mGtype; // Id of the gType of the word, example: "inst". 00049 Data *mpData; // Word data attributes (if any) 00050 Word *mpNext; // Pointer to next entry for this word. 00051 Word *mpComp; // For words of type "North Dakota", the main entry will 00052 // be for "Dakota" but it will have a compound link to 00053 // a special entry with mWord="North". 00054 int mFreq; // Frequency this word occurs. Used by spelling correction. 00055 }; 00056 00058 class Dictionary 00059 { 00060 public: 00062 static void ReadWordFile(char *word_file); 00063 00065 static Word *Lookup(Id wordId); 00066 00068 static void AddWord(Id wordId, Id gramId); 00069 00071 static Id Token(char *inWord); 00072 00074 static void Dump(char *fileName); 00075 }; 00076 00077 #endif // DICTIONARY_H