VTK
dox/TextAnalysis/vtkTokenizer.h
Go to the documentation of this file.
00001 /*=========================================================================
00002 
00003   Program:   Visualization Toolkit
00004   Module:    vtkTokenizer.h
00005 
00006   Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
00007   All rights reserved.
00008   See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
00009 
00010      This software is distributed WITHOUT ANY WARRANTY; without even
00011      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
00012      PURPOSE.  See the above copyright notice for more information.
00013 
00014 =========================================================================*/
00015 /*-------------------------------------------------------------------------
00016   Copyright 2008 Sandia Corporation.
00017   Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00018   the U.S. Government retains certain rights in this software.
00019 -------------------------------------------------------------------------*/
00020 
00081 #ifndef __vtkTokenizer_h
00082 #define __vtkTokenizer_h
00083 
00084 #include <vtkTableAlgorithm.h>
00085 #include <vtkUnicodeString.h> //Needed for delimiter specification
00086 
00087 class VTK_TEXT_ANALYSIS_EXPORT vtkTokenizer :
00088   public vtkTableAlgorithm
00089 {
00090 public:
00091   static vtkTokenizer* New();
00092   vtkTypeMacro(vtkTokenizer, vtkTableAlgorithm);
00093   void PrintSelf(ostream& os, vtkIndent indent);
00094 
00095 //BTX
00098   typedef std::pair<vtkUnicodeString::value_type, vtkUnicodeString::value_type> DelimiterRange;
00101   typedef std::vector<DelimiterRange> DelimiterRanges;
00102 
00105   static const DelimiterRanges Punctuation();
00108   static const DelimiterRanges Whitespace();
00112   static const DelimiterRanges Logosyllabic();
00113 
00116   void AddDroppedDelimiters(vtkUnicodeString::value_type begin, vtkUnicodeString::value_type end);
00119   void AddDroppedDelimiters(const DelimiterRanges& ranges);
00120 
00123   void AddKeptDelimiters(vtkUnicodeString::value_type begin, vtkUnicodeString::value_type end);
00125 
00127   void AddKeptDelimiters(const DelimiterRanges& ranges);
00128 //ETX
00130 
00132 
00136   void DropPunctuation();
00137   void DropWhitespace();
00138   void KeepPunctuation();
00139   void KeepWhitespace();
00140   void KeepLogosyllabic();
00142 
00144   void ClearDroppedDelimiters();
00146   void ClearKeptDelimiters();
00147 
00148 //BTX
00149 protected:
00150   vtkTokenizer();
00151   ~vtkTokenizer();
00152 
00153   int FillInputPortInformation(int port, vtkInformation* info);
00154 
00155   virtual int RequestData(
00156     vtkInformation* request,
00157     vtkInformationVector** inputVector,
00158     vtkInformationVector* outputVector);
00159 
00160 private:
00161   vtkTokenizer(const vtkTokenizer &); // Not implemented.
00162   void operator=(const vtkTokenizer &); // Not implemented.
00163 
00164   class Internals;
00165   Internals* const Implementation;
00166 //ETX
00167 };
00168 
00169 #endif // __vtkTokenizer_h
00170