Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
dp_main.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1995,1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author: Simon King */
34/* Date : May 1998 */
35/*-----------------------------------------------------------------------*/
36/* Simple dynamic programming */
37/* e.g. for aligning pronunciations */
38/*=======================================================================*/
39
40#include <cstdlib>
41#include <cstdio>
42#include <cmath>
43#include "EST.h"
44
45EST_read_status load_TList_of_StrVector(EST_TList<EST_StrVector> &w,
46 const EST_String &filename,
47 const int vec_len);
48
49typedef
50float (*local_cost_function)(const EST_Item *item1,
51 const EST_Item *item2);
52
53typedef
54bool (*local_pruning_function)(const int i,
55 const int j,
56 const int max_i,
57 const int max_j);
58
59bool dp_match(const EST_Relation &lexical,
60 const EST_Relation &surface,
62 local_cost_function lcf,
63 local_pruning_function lpf,
64 EST_Item *null_sym);
65
66bool dp_match(const EST_Relation &lexical,
67 const EST_Relation &surface,
69 local_cost_function lcf,
70 EST_Item *null_sym);
71
72float local_cost(const EST_Item *s1, const EST_Item *s2);
73bool local_prune(const int i, const int j,
74 const int max_i,const int max_j);
75static void load_vocab(const EST_String &vfile);
76static EST_Item *null_sym;
77//static EST_String deleted_marker = "<del>";
78//static EST_String inserted_marker = "<ins>";
79static bool show_cost=FALSE;
80static int prune_width = 100;
81//static float path_cost;
82int StrVector_index(const EST_StrVector &v, const EST_String &s);
83
84EST_StrList pattern1_l, pattern2_l, vocab_l;
85EST_StrVector pattern1, pattern2, vocab;
86
87EST_FMatrix DP_substitution_cost;
88EST_FVector DP_deletion_cost;
89EST_FVector DP_insertion_cost;
90EST_IMatrix DP_path_i,DP_path_j;
91EST_FMatrix cost_matrix;
92
93// this are local distance measures,
94// NOT the 1/2/1 weights in the DP recursion formula !
95float insertion_cost = 1;
96float deletion_cost = 1;
97float substitution_cost = 1;
98
99
100// two possibilities :
101// 1. simple insertion/deletion/substitution penalties
102// 2. matrix of distances between all pairs of vocab items,
103// where vocab includes some null symbol for insertions/deletions
104
105EST_String distance_measure = "simple"; // could be "matrix"
106
107
108
109
110/** @name <command>dp</command> <emphasis> Perform dynamic programming on label sequences</emphasis>
111 * @id dp-manual
112 * @toc
113 */
114
115//@{
116
117/**@name Synopsis
118 */
119//@{
120
121//@synopsis
122
123/**
124dp provides simple dynamic programming to find the lowest cost
125alignment of two symbol sequences. Possible uses include:
126
127<itemizedlist>
128<listitem><para>Label alignment (e.g. speech recogniser output scoring) </para></listitem>
129</itemizedlist>
130
131The costs of inserting/deleting/substituting symbols can be given
132either with command line options, or as a file containing a full
133matrix of costs. In the former case, all insertions will have the same
134cost. In the latter case, each row of the file contains the cost of
135replacing a symbol in sequence 1 with each possible symbol in the
136vocabulary to align with sequence 2, including a special "place
137holder" symbol used for insertions/deletions. See the examples
138below. The place holder can be redefined.
139
140The output is an EST utterance with three Relations: the first two are
141the input sequences, and the third shows the alignment found by
142dynamic programming.
143
144*/
145
146//@}
147
148/**@name Options
149 */
150//@{
151
152//@options
153
154//@}
155
156
157int main(int argc, char **argv)
158{
161 EST_String out_file;
162 //int i;
164
165 null_sym = new EST_Item;
166 null_sym->set_name("<null>");
167
168 parse_command_line(argc, argv,
169 EST_String("Usage:\n")+
170 "dp <options> \"pattern 1\" \"pattern 2\"\n"+
171 "Find the best alignment of a pair of symbol sequences (e.g. word pronuciations).\n"+
172 "-vocab <string> file containing vocabulary\n"+
173 "-place_holder <string> which vocab item is the place holder (default is " + null_sym->name() + " )\n"+
174 "-show_cost show cost of matching path\n"+
175 "-o <string> output file\n"+
176 "-p <int> 'beam' width\n"+
177 "Either:\n"+
178 "-i <float> insertion cost\n"+
179 "-d <float> deletion cost\n"+
180 "-s <float> substitution cost\n"+
181 "Or:\n"+
182 "-cost_matrix <string> file containing cost matrix\n",
183 files, al);
184
185 out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
186
187 if (al.present("-vocab"))
188 load_vocab(al.val("-vocab"));
189 else
190 {
191 cerr << argv[0] << ": no vocab file specified" << endl;
192 exit(-1);
193 }
194
195 if (al.present("-p"))
196 prune_width = al.ival("-p");
197
198 if (al.present("-cost_matrix"))
199 {
200 if(al.present("-i") || al.present("-d") || al.present("-s") )
201 {
202 cerr << "Can't have ins/del/subs costs as well as matrix !" << endl;
203 exit(-1);
204 }
205 distance_measure="matrix";
206 cost_matrix.load(al.val("-cost_matrix"));
207
208 if(al.present("-place_holder"))
209 null_sym->set_name(al.val("-place_holder"));
210
211 if(StrVector_index(vocab,null_sym->name()) < 0)
212 {
213 cerr << "The place holder symbol '" << null_sym->name();
214 cerr << "' is not in the vocbulary !" << endl;
215 exit(-1);
216 }
217
218 if(cost_matrix.num_columns() != vocab.length())
219 {
220 cerr << "Cost matrix number of columns must match vocabulary size !" << endl;
221 exit(-1);
222 }
223 if(cost_matrix.num_rows() != vocab.length())
224 {
225 cerr << "Cost matrix number of rows must match vocabulary size !" << endl;
226 exit(-1);
227 }
228
229 }
230 else if(al.present("-i") && al.present("-d") && al.present("-s") )
231 {
232 insertion_cost = al.fval("-i");
233 deletion_cost = al.fval("-d");
234 substitution_cost = al.fval("-s");
235 }
236 else
237 {
238 cerr << "Must give either ins/del/subs costs or cost matrix !" << endl;
239 exit(-1);
240 }
241
242
243 if(al.present("-show_cost"))
244 show_cost=TRUE;
245
246 if(files.length() != 2)
247 {
248 cerr << "Must give 2 patterns !" << endl;
249 exit(-1);
250 }
251
252 StringtoStrList(files(files.head()),pattern1_l," ");
253 StringtoStrList(files(files.head()->next()),pattern2_l," ");
254
255 EST_Utterance utt;
256 path1 = utt.create_relation("Lexical");
257 path2 = utt.create_relation("Surface");
258 match = utt.create_relation("Match");
259
260 EST_Litem *p;
261 for(p=pattern1_l.head();p != 0; p=p->next())
262 {
263 if( StrVector_index(vocab,pattern1_l(p)) < 0)
264 {
265 cerr << pattern1_l(p) << " is not in the vocabulary !" << endl;
266 exit(1);
267 }
269 new_item.set_name(pattern1_l(p));
270 path1->append(&new_item);
271 }
272
273 for(p=pattern2_l.head();p != 0; p=p->next())
274 {
275 if( StrVector_index(vocab,pattern2_l(p)) < 0)
276 {
277 cerr << pattern2_l(p) << " is not in the vocabulary !" << endl;
278 exit(1);
279 }
281 new_item.set_name(pattern2_l(p));
282 path2->append(&new_item);
283 }
284
285 // to do : check all items in two patterns are in vocab
286 // .....
287
288
289// cerr << "MATCHING..." << endl;
290 if(!dp_match(*path1,*path2,*match,
291 local_cost,local_prune,null_sym))
292// cerr << "OK !" << endl;
293// else
294 cerr << "No match could be found." << endl;
295
296
297 utt.save(out_file);
298
299
300 return 0;
301}
302
303
304static void load_vocab(const EST_String &vfile)
305{
306 // Load vocabulary (strings)
308
309 if (ts.open(vfile) == -1)
310 {
311 cerr << "can't find vocab file \"" << vfile << "\"" << endl;
312 exit(-1);
313 }
314
315 while (!ts.eof())
316 if (ts.peek() != "")
317 vocab_l.append(ts.get().string());
318
319 ts.close();
320
321 StrList_to_StrVector(vocab_l,vocab);
322}
323
324
325float local_cost(const EST_Item *s1, const EST_Item *s2)
326{
327 // N.B. cost is not necessarily zero for matching symbols
328
329 //cerr << "lcf " << s1 << "," << s2 << endl;
330
331 // otherwise cost is either insertion cost, or cost_matrix value
332 if(distance_measure == "simple")
333 {
334 if(s1->name() == s2->name())
335 return 0;
336 else
337 {
338 if(s1 == null_sym)
339 return insertion_cost;
340 else if(s2 == null_sym)
341 return deletion_cost;
342 else
343 return substitution_cost;
344 }
345 }
346
347 //cerr << "Cost of replacing " << s1 << " with " << s2 << " is ";
348 //cerr << cost_matrix(StrVector_index(vocab,s1),StrVector_index(vocab,s2)) << endl;
349
350 return cost_matrix(StrVector_index(vocab,s1->name()),
351 StrVector_index(vocab,s2->name()));
352
353}
354
355bool local_prune(const int i, const int j,
356 const int max_i, const int max_j)
357{
358
359 // keep it simple :
360 // if we stray too far from the diagonal - PRUNE !
361
362 float scale = (float)max_i / (float)max_j;
363
364 float near_j = (float)i / scale;
365 float near_i = (float)j * scale;
366
367 /*
368 cerr << "prune i: " << i << " " << near_i - (float)i
369 << " " << abs(near_i - (float)i)<< endl;
370 cerr << "prune j: " << j << " " << near_j - (float)j
371 << " " << abs(near_j - (float)j) << endl;
372 */
373
374 if( (abs((int)(near_i - (float)i)) > prune_width) ||
375 (abs((int)(near_j - (float)j)) > prune_width) )
376 {
377 //cerr << "lpf " << i << "," << j << " true" << endl;
378 return TRUE;
379 }
380
381 return FALSE;
382
383}
384
385
386/**@name Examples
387
388<para>
389Align two symbol sequences:
390</para>
391
392<para>
393<screen>
394$ dp -vocab vocab.file "a b c" "a b d c" -i 1 -d 2 -s 3
395</screen>
396</para>
397
398<para>
399where vocab.file contains "a b c d"
400</para>
401
402<para>
403Or, using a full cost matrix:
404</para>
405
406<para>
407<screen>
408$ dp -vocab vocab2.file -cost_matrix foo "a b c" "a b d c"
409</screen>
410</para>
411
412<para>
413where vocab2.file contains "a b c d <null>" and the file foo contains:
414</para>
415
416<para>
417<screen>
418<para>0 3 3 3 2</para>
419<para>3 0 3 3 2</para>
420<para>3 3 0 3 2</para>
421<para>3 3 3 0 2</para>
422<para>1 1 1 1 0</para>
423</screen>
424</para>
425
426<para> Each row of foo shows the cost of replacing an input symbol
427with each symbol in the vocabulary to match an output symbol. Each row
428corresponds to an item in the vocabulary (in the order they appear in
429the vocabulary file). In the example, replacing 'a' with 'a' costs 0,
430replacing 'a' with any of 'b' 'c' or 'd' costs 3 (a substitution), and
431replacing 'a' with the place holder symbol 'null' costs 2 (a
432deletion). The cost of replacing 'null' with anything other than
433'null' costs 1 (an insertion). The costs of 1,2 and 3 used here are
434only for illustration. The cost matrix meed not have the form above -
435for example, replacing 'a' with 'a' need not cost 0. The entries in
436foo are read as floats. </para>
437
438*/
439//@{
440//@}
EST_read_status load(const EST_String &filename)
Load from file (ascii or binary as defined in file)
void append(const T &item)
add item onto end of list
Definition EST_TList.h:191
int num_columns() const
return number of columns
int num_rows() const
return number of rows
INLINE int length() const
number of items in vector.
EST_write_status save(const EST_String &filename, const EST_String &type="est_ascii") const
EST_Relation * create_relation(const EST_String &relname)
create a new relation called <parameter>n</parameter>.