// LineBrk.cpp

// Line break sample implementation using pair tables

// Set WINDOWS_UI to 0 to get commandline UI	 // compiles, but has not yet been tested
// Set WINDOWS_UI to 1 for generic win 32 dialog // not tested
// Set WINDOWS_UI to 2 for private build		 // tested extensively

#ifndef WINDOWS_UI
#define WINDOWS_UI 0
#endif

// Sample dialogs work better if compiled for Unicode
#if WINDOWS_UI
#ifndef _UNICODE
#define _UNICODE
#endif
#ifndef UNICODE
#define UNICODE
#endif
#endif

// Linebreak include file
#ifndef _LINEBRK_H_
#include "linebrk.h"
#endif

// Set to 0 for default
// Set to 1 to run in debug mode
// Enable this line for debugging, or set via makefile
// #define DEBUGGING 1				

#ifndef DEBUGGING
#define DEBUGGING 0
#else
#ifndef DEMO
#define DEMO 1
#endif
#endif
// Debug mode enables Demo mode
// Debug mode enables Table checks


// Set to 1 if demo operation is 
// desired outside debug mode
#ifndef DEMO
#define DEMO 0
#endif

#if DEBUGGING
// for Table verification, enable this line
// #define VERIFY_PAIR_TABLE  
#ifdef VERIFY_PAIR_TABLE
// change as needed to for table verification
#define VERIFICATION_FILE L"PairTableFull.html"
#pragma message("Table assertions enabled")
#endif
#endif

/*---------------------------------------------------------------------------
	 File: LineBrk.Cpp

	 This is sample code for the line breaking algorithm of
	 Unicode Standard Annex #14, Line Breaking Properties, Version 4.1.0

	 Conformance
	 -----------

	 This sample uses a pseudo-alphabet for ease of testing. To make the 
	 code work for regular Unicode, replace the function classifyLnBrk() with 
	 one that looks up the line break classes for Unicode characters from 
	 the file LineBreak.txt in the Unicode Character Database.

	 While every effort has been made to conform to the specifications
	 in UAX#14, no formal testing or verification has been carried out,
	 other than ensuring that the values in the pair table match those
	 in the HTML text of UAX#14.

     Build Notes
	 -----------

	 To compile the sample implementation please set the #define 
	 directives above so the correct headers get included. 
	 
	 The Win32 version is provided as a dialog procedure. To create 
	 a full executable using VC++ set up a Win32 project and add all
	 the files to it. Add #define WINDOWS_UI=1 at the top of each file
	 or set /DWINDOWS_UI=1 on the compiler commandline.

	 To compile a standalone commandline version, use just the two
	 files linebrk.cpp and linebrk.h.

	 This code uses an extension to C++ that gives variables declared in
	 a for() statement function the same scope as the for() statement.
	 If your compiler does not support this extension, you may need to
	 move the declaration, e.g. int ich = 0; in front of the for statement.

	 Notation
	 --------
	 Pointer variables generally start with the letter p
	 Counter variables generally start with the letter c
	 Index variables generally start with the letter i
	 Boolean variables generally start with the letter f

	 The enumerated line break classes have the same name as in the
	 description for the Unicode Line Breaking Property

	 Update History:
	 --------------
	 Last Revised 05-03-30

	 Updated the pair table, improved handling of CM.
	 Sample can now produce HTML pair table for verification.
	 Changed to match Unicode Version 4.1
	 
	 Last Revised 04-06-03

	 Updated the pair table, improved handling of CM.
	 Removed commented out code. Added new classes NL and WJ.

	 Last Revised 23-08-02

	 Expanded sample to handle all classes, including BK, CR, LF and SG
	 Fixed the case of space at beginning of the line. Revised the
	 break pair table to match revised rules in Version 4.0.0 of UAX#14.
	 
	 Last Revised 03-08-01

	 Fixed regression in findLineBreak that made all characters
	 behave like combining marks when CMInTable was deselected.

	 Last Revised 04-25-01

	 Credits:
	 -------
	 Written by: Asmus Freytag
	 

     Disclaimer and legal rights:
     ---------------------------
     Copyright (C) 1999-2005, ASMUS, Inc. All Rights Reserved. 
     Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
 
     THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. 
     IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE 
     BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, 
     OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
     WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 
     ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE SOFTWARE.

     The files linebrk.h, linebrk.rc, and resource.h are distributed together 
	 with this file and are included in the above.
----------------------------------------------------------------------------------*/

// === LOCAL FUNCTION DECLARTIONS ===========================================
int classifyLnBrk(const LPTSTR pszText, enum break_class * pcls,  int cch);
int findLineBrk(enum break_class *pcls, enum break_action *pbrk, int cch, bool fTailorCMSP = false);
int findComplexBreak(enum break_class cls, enum break_class *pcls, enum break_action *pbrk, int cch);

void verifyTable();
enum break_class LBClassFromCh(TCHAR ch);

// === HELPER FUNCTIONS ==========================================================

// a stub to bypass assertions
bool assert(bool x)
{
	if (x)
		return true;
	else
		return false;
}

// === DEMO DISPLAY FUNCTIONS AND DECLARATIONS ====================================

// The demo code uses a pseudo classification which maps the ASCII character set to various line break
// classes. For a real implementation, use the values in LineBreak.txt in the  Unicode Character Database.
// The sample mapping is found further below together wiht the classification function.
//
// This section of the file contains additional mappings from various values that are used to make special 
// characters visible, sets of arrays that allow a mapping to the short name, and a help string for the demo.

// mapping of special characters to control codes for the pseudo alphabet
const chFIRST = 1;
const chZWSP = 1;
const chZWNBSP = 2;
const chNBHY = 3;
const chSHY = 4;
const chNBSP = 5;
const chSSP = 6;	// Soft Space
const chEM = 7;	// Em dash
const chELLIPSIS = 8; // Ellipsis
const chTB =  9;
const chLFx = 10;
const chOBJ = 11;
const chDummy = 12;
const chCRx = 13;
const chLAST = 13;

// characters in the above list are mapped *both* ways
// don't use regular ASCII characters

// mapping of special character codes to Unicode symbols for visualization
int chVisibleFromSpecial[] =
{
/* ZW  1 chZWSP */		0x2020,	// show as dagger
/* GL  2 chZWNBSP */	0x2021,	// show as double dagger
/* GL  3 chNBHY */		0x00AC,	// show as not sign
/* BA  4 chSHY */		0x00B7, // show as dot
/* GL  5 chNBSP */		0x2017,	// show as low line
/* -- 6 chSSP */		0x203E,	// show as double low line
/* B2 7 chEM */			0x2014,	//
/* IN 8 chELLIPSIS */	0x2026,	//
/* CM 9 chTB */			0x2310,	// show as not sign
/* LF 10 chLF */		0x2580,	// show as high square
/* CB 11 chOBJ */		0x2302,	// show as house (delete)
/* -- 12 chdummy */		0x2222,
/* CR 13 chCR */		0x2584,	// show as low square
};

// map character codes to visible symbol
int VisibleFromChar(int ch)
{
	if (ch >= chFIRST && ch <= chLAST)
	{
		// special char are one based enumeration
		return chVisibleFromSpecial[ch-1];
	}
	else
	{
		return ch;		
	}
}

// map visible symbol to character
int CharFromVisible(int ch)
{
	for (int ich = 0; ich < sizeof chVisibleFromSpecial / sizeof (int); ich ++)
	{
		if (ch == chVisibleFromSpecial[ich])
		{
			return ich + 1;
		}
	}
	return ch;
}

// help string for the Windows UI, showing sample characters
#if WINDOWS_UI
TCHAR * explain =
TEXT("This sample uses the following pseudo-alphabet as input\r\n")
TEXT("Alphabetic:  a-f   Ideograph:   A-F   Numeric:    0-9 \r\n")
TEXT("Combining:    `    Hangul 2:     h    Hangul 3:    H  \r\n")
TEXT("Jamo Lead:    L    Jamo Vowel:   V    Jamo Trail:  T  \r\n")
TEXT("Prefix:       $    Postfix:      %    Separator:   ,  \r\n")
TEXT("Exclamation:  !?   Non-Starter:  :    Syntax:      /  \r\n")
TEXT("Break after:  *    Break Before: &&    Hyphen:      -  \r\n")
TEXT("Quote:        \"    Glue:         G    Word Joiner: W  \r\n")
TEXT("Open         {[(   Close:       )]}   Leaders:     _  \r\n") 
TEXT("ZW-Space:     Z    Complex:      Y    Object:      @  \r\n") 
TEXT("Space:       ' '   Break opportunities are shown as | or \xA6");
#endif

// representative reverse mapping of the above
TCHAR CharFromLnbkTypes[] =
{
	// OP,	CL,  QU,  GL,  NS,  EX,  SY,  IS,  PR,  PO,  NU,  AL,  ID,  IN,  HY,  BA,  BB,  B2,  ZW,  CM,  WJ,  SA,  SP,[ PS,  BK,  CR,  LF,  NL, CB, SG] = class 
	 0x28,0x29,0x27,0x3D,0x3a,0x21,0x2f,0x2c,0x24,0x25,0x30,0x61,0x4A,0x5f,0x2d,0x2a,0x26,0x07,0x01,0x6a,0x77, 0x7f,0x20,
    //  (    )    "   =    :    !    /     ,    $   %    0     a    I    _   -    *    &   bell ^A    i   DEL   ' '
};


// map Lbcls into single letter sequence 1-9,A...Y" for times when it is 
// desired to show a string of linebreak classes that has the same length
// as the input string in characters
int CharFromLbcls[] =
{
// OP,	CL, QU, GL, NS, EX, SY, IS, PR, PO, NU, AL, ID, IN, HY, BA, BB, B2, ZW, CM, WJ, H2, H3, JL, JV, JT, SA, SP, PS, BK, CR, LF, NL, CB, 
   '1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y',
//....
};

// Line break classes are shown vertically in the UI so that each class
// fits underneath the current character. The first array gives the top 
// row, the second the bottom row.

int CharFromLbcls1[] =
{
// OP,	CL, QU, GL, NS, EX, SY, IS, PR, PO, NU, AL, ID, IN, HY, BA, BB, B2, ZW, CM, WJ, H2, H3, JL, JV, JT, SA, SP, PS, BK, CR, LF, NL, CB, 
   'O','C','Q','G','N','E','S','I','P','P','N','A','I','I','H','B','B','B','Z','C','W','H','H','J','J','J','S','S','P','B','C','L','N','C', //....
};
int CharFromLbcls2[] =
{
// OP, CL, QU, GL, NS, EX, SY, IS, PR, PO, NU, AL, ID, IN, HY, BA, BB, B2, ZW, CM, WJ, H2, H3, JL, JV, JT, SA, SP, PS, BK, CR, LF, NL, CB, 
   'P','L','U','L','S','X','Y','S','R','O','U','L','D','N','Y','A','B','2','W','M','J','2','3','L','V','T','A','P','S','K','R','F','L','B', //....
};

// These are also needed in the UT portion of the code so they are already defined here
enum break_action
{
	DIRECT_BRK,
	INDIRECT_BRK, 		
	COMBINING_INDIRECT_BRK, 	
	COMBINING_PROHIBITED_BRK, 	
	PROHIBITED_BRK,
	EXPLICIT_BRK,
	HANGUL_SPACE_BRK,
};

//=== DEMO DIALOG AND HELPER FUNCTIONS==============================================

#define MAX_CCH 256
int GetInputText(TCHAR * pszInput, int cch)
{
	static int ich[MAX_CCH]; 

	int max_ich = sizeof CharFromLnbkTypes / sizeof (TCHAR);

	for (int i = 0; i < cch; i++)
	{
		if (++ich[i] >= max_ich)
		{
			ich[i] = 0;
			continue;
		}
		break;
	}

	for (i = 0; i < cch; i++)
	{
		pszInput[i] = CharFromLnbkTypes[ich[i]];
	}
	pszInput[i] = 0;
	
	return i;
}

// === DISPLAY OPTIONS ======================================================


#if WINDOWS_UI > 0
#pragma message("Compiling linebrk.cpp for Windows UI")

// === LINEBRK MAIN FUNCTION ===================================================

void ShowLBClasses(HWND hwndDlg, int idc, enum break_class *lbcls, int cch)
{
	TCHAR pszTypes[MAX_CCH * 2];
	for (int ich = 0; ich < cch; ich++)
	{
		pszTypes[ich] = CharFromLbcls1[lbcls[ich]];//LBClassFromCh(pszInput[ich])];
	}
	pszTypes[ich++] = '\r';
	pszTypes[ich++] = '\n';
	for ( ; ich < cch * 2 + 2; ich++)
	{
		pszTypes[ich] = CharFromLbcls2[lbcls[ich - cch - 2]];//LBClassFromCh(pszInput[ich - cch - 2])];
	}
	pszTypes[ich] = 0;
	SetDlgItemText(hwndDlg, idc, pszTypes);
}							  

void ShowLineBreaks(HWND hwndDlg, int idc, LPTSTR pszInput, enum break_action *pbrk, int cch)
{
	TCHAR pszBrkText[2*MAX_CCH];
	for (int ichIn = 0, ichOut = 0; ichIn < cch; ichIn++)
	{
		// echo input character
		pszBrkText[ichOut++] = pszInput[ichIn];
		
		// echo break opportunity
		switch (pbrk[ichIn])
		{
		case EXPLICIT_BRK:					// '!' break required
#ifdef UNICODE
			pszBrkText[ichOut++] = 0x2551;	// double vertical line
#else
			pszBrkText[ichOut++] = '|';		// double vertical line
			pszBrkText[ichOut++] = '|';		// double vertical line
#endif
			break;
		case DIRECT_BRK:					// '_' break allowed
			pszBrkText[ichOut++] = '|';
			break;
		default:
		case INDIRECT_BRK:					// '%' only break across space (aka 'indirect break' below)
			pszBrkText[ichOut++] = (TCHAR) 0xa6;
			break;
		case COMBINING_INDIRECT_BRK:		// '#' indirect break for combining marks
			pszBrkText[ichOut++] = (TCHAR) 0xa6;
			break;
		case COMBINING_PROHIBITED_BRK:		// '@' indirect break for combining marks
			/* fall through */
		case PROHIBITED_BRK:				// '^' no break allowed
			break;
		case HANGUL_SPACE_BRK:				// break allowed, except when spaces are used with Hangul
			pszBrkText[ichOut++] = (TCHAR) 0xa6; // not yet used
		};
	}
	pszBrkText[ichOut] = 0;
	SetDlgItemText(hwndDlg, idc, pszBrkText);
}

/*---------------------------------------------------------------------------
	Function: LineBrk
	
	Implements 

	Input: Handle to dialog

	Note: directly reads/writes to fields int the dialog, limit 256 chars
----------------------------------------------------------------------------*/

void DoLineBrkDlg(HWND hwndDlg)
{
	TCHAR pszInput[MAX_CCH];

	enum break_class lbcls[MAX_CCH];
	enum break_action lbrks[MAX_CCH];

	// read input string
	int cch = GetDlgItemText(hwndDlg, IDC_INPUT, pszInput, MAX_CCH); 

	// assign line breaking classes
	classifyLnBrk(pszInput, lbcls, cch);

	ShowLBClasses(hwndDlg, IDC_TYPES, lbcls /*pszInput*/, cch);

	if (!cch)
		return;
	
	// find the line breaks
	int ich = 0;
	enum break_action * lbrksTmp = lbrks;
	enum break_class * lbclsTmp = lbcls;
	int cchTmp = cch;
	do {
		ich += findLineBrk(lbclsTmp + ich, lbrksTmp + ich, cchTmp, 
			FALSE != IsDlgButtonChecked(hwndDlg, IDC_ALTERNATE));
		cchTmp = cch - ich;
	} while(cchTmp);

	// write display string
	ShowLineBreaks(hwndDlg, IDC_DISPL, pszInput, lbrks, cch);
}

// helper function for dialog
void InsertChAtSelection(HWND hwndDlg, TCHAR chFormat, int ichStart, int ichEnd)
{
	TCHAR pszInput[MAX_CCH];
	TCHAR pszNew[MAX_CCH];

	// read input string
	int cch = GetDlgItemText(hwndDlg, IDC_INPUT, pszInput, MAX_CCH); 

	// no selection
	if (ichEnd < ichStart || ichStart > cch)
		return;

	// insert ZWSP, ZWNBSP, NBHY, SHY, NBSP,  etc
	lstrcpyn(pszNew, pszInput, ichStart + 1);
	pszNew[ichStart] = VisibleFromChar(chFormat);
	lstrcpyn(pszNew + ichStart + 1, pszInput + ichStart, cch - ichStart + 1);

	// write formatted string
	SetDlgItemText(hwndDlg, IDC_INPUT, pszNew);
}


#if WINDOWS_UI > 1
// For private build, this is an ordinary modal dialog
BOOL CALLBACK LineBrkDlgProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam)
{
	static int ichStart =0;
	static int ichEnd = 0;

    switch (message)
    {
		case WM_INITDIALOG:
	   		{
				#ifdef _WINDOW_H_
				// center window (requires private header)
				CWindow winDlg(hwndDlg);
				winDlg.CenterAbove(GetWindow(hwndDlg,GW_OWNER));
				#endif

				// verify the table
				verifyTable();

				// initialize dialog 
				SetDlgItemText(hwndDlg, IDC_EXPLAIN, explain);
				return TRUE;
			}
			// ... continued after #endif
#else
// For standalone (WINDOWS_UI == 1) the dialog is run as a main window
// requiring some difference in initialization code and message handling
BOOL CALLBACK SetExplainProc(HWND hwndChild, LPARAM lParam)
{
	LONG id = GetWindowLong(hwndChild, GWL_ID);
	if (id == IDC_EXPLAIN)
	{
		SendMessage(hwndChild, (UINT) WM_SETTEXT,  (WPARAM) 0,  (LPARAM) lParam); 
		return FALSE; // done
	}
	return TRUE; // continue looking
}

LRESULT CALLBACK LineBrkWndProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam)
{
	static int ichStart =0;
	static int ichEnd = 0;

    switch (message)
    {
		case WM_SHOWWINDOW:
			// verify the table
			verifyTable();
			// initialize explanation window
			EnumChildWindows(hwndDlg, SetExplainProc,  (LPARAM) explain);
			return 0;
			break;
	   case WM_DESTROY:
		   PostQuitMessage(0);
		   return 0;
#endif
	   // Handling buttons and edit fields
       case WM_COMMAND:
            switch (GET_WM_COMMAND_ID(wParam, lParam)) //Command ID
            {
				// change to inpt text: run the algorithm
               case IDC_INPUT: 
					SendDlgItemMessage(hwndDlg, IDC_INPUT, EM_GETSEL, (LPARAM) &ichStart, (WPARAM) &ichEnd);
				    DoLineBrkDlg(hwndDlg);
				    break;

			   case IDC_CMINTABLE:
				   EnableWindow(GetDlgItem(hwndDlg, IDC_ALTERNATE), !IsDlgButtonChecked(hwndDlg, IDC_CMINTABLE));
				   // fall through
			   case IDC_ALTERNATE:
			   case IDC_HANGULCLUSTER:
				   DoLineBrkDlg(hwndDlg);
				   break;				   

				// buttons to enter special character codes
                case IDC_TAB:
				    InsertChAtSelection(hwndDlg, chTB, ichStart, ichEnd);
			        break;
               case IDC_CR:
				   InsertChAtSelection(hwndDlg, chCRx, ichStart, ichEnd);
				   break;
               case IDC_LF:
				   InsertChAtSelection(hwndDlg, chLFx, ichStart, ichEnd);
				   break;
               case IDC_ZWSP:
				   InsertChAtSelection(hwndDlg, chZWSP, ichStart, ichEnd);
				   break;
               case IDC_ZWNBSP:
				   InsertChAtSelection(hwndDlg, chZWNBSP, ichStart, ichEnd);
				   break;
               case IDC_NBSP:
				   InsertChAtSelection(hwndDlg, chNBSP, ichStart, ichEnd);
				   break;
               case IDC_EM:
				   InsertChAtSelection(hwndDlg, chEM, ichStart, ichEnd);
				   break;
               case IDC_ELLIPSIS:
				   InsertChAtSelection(hwndDlg, chELLIPSIS, ichStart, ichEnd);
				   break;
               case IDC_OBJ:
				   InsertChAtSelection(hwndDlg, chOBJ, ichStart, ichEnd);
				   break;

			   case IDC_SHY:
				   InsertChAtSelection(hwndDlg, chSHY, ichStart, ichEnd);
				   break;
			   case IDC_NBHY:
				   InsertChAtSelection(hwndDlg, chNBHY, ichStart, ichEnd);
				   break;
			   #if WINDOWS_UI == 2
				// buttons to close the dialog
			   case IDOK:
				    EndDialog(hwndDlg, IDOK);
				    return TRUE;
               case IDCANCEL:
                    EndDialog(hwndDlg, IDCANCEL);
                    return TRUE;
				#endif
            }
			break;
    }
#if WINDOWS_UI == 1
	return DefWindowProc(hwndDlg, message, wParam, lParam);
#else
    return FALSE ;
#endif
}

#else
#pragma message("Compiling linebrk.cpp for command line version")

// ===== FUNCTIONS FOR COMMAND LINE VERSION ==============================

#include <stdlib.h>
#include <string.h>

// An alternate CharFromTypes array may be needed to use the command 
// line version,

#define MAX_CCH 256

void ShowLBClasses(FILE *f, LPTSTR pszInput, int cch)
{
	TCHAR pszTypes[MAX_CCH * 2];
	for (int ich = 0; ich < cch; ich++)
	{
		pszTypes[ich] = CharFromLbcls1[LBClassFromCh(pszInput[ich])];
	}
	pszTypes[ich++] = '\r';
	pszTypes[ich++] = '\n';
	for ( ; ich < cch * 2 + 2; ich++)
	{
		pszTypes[ich] = CharFromLbcls2[LBClassFromCh(pszInput[ich - cch - 2])];
	}
	pszTypes[ich] = 0;

    fprintf(f, pszTypes);
}							  

void ShowLineBreaks(FILE * f, LPTSTR pszInput, break_action *pbrk, int cch)
{
	TCHAR pszBrkText[2*MAX_CCH];
	for (int ichIn = 0, ichOut = 0; ichIn < cch; ichIn++)
	{
		if (pbrk[ichIn])
		{
			if (pbrk[ichIn] > 1)
			{
				pszBrkText[ichOut++] = pszInput[ichIn];
				pszBrkText[ichOut++] = (TCHAR) 0xa6;
			}
			else
			{
				pszBrkText[ichOut++] = pszInput[ichIn];
			}
		}
		else
		{
			pszBrkText[ichOut++] = pszInput[ichIn];
			pszBrkText[ichOut++] = '|';
		}
	}
	pszBrkText[ichOut] = 0;
    fprintf(f, pszBrkText);
}

void usage(char *s) 
{
    printf("Usage: %s [-verbose] [-nomirror] [-clean] strings...\n", s);
    printf("\t-verbose = verbose debugging output.\n");
    printf("\t-nomirror = refrain from glyph mirroring.\n");
    printf("\t-clean = clean up the result.\n");
    printf("\tOptions affect all subsequent arguments.\n");
    printf("\tAll other arguments are interpreted as strings to process.\n");
}

int main(int argc, char** argv) 
{
    int realArg = 0;
    int doCMInTable = 1;
    int beVerbose = 0;

    FILE* f = stdout;

	verifyTable();

    if (argc == 1) 
	{
		usage(argv[0]); exit(0);
    }
    for (int i = 1; i < argc; ++i) 
	{
		if (strcmp(argv[i], "-verbose") == 0) 
		{
			beVerbose = 1;
			continue;
		} 
		else if (strcmp(argv[i], "-cm") == 0) 
		{
			doCMInTable = 0;
			continue;
		} 
		else 
		{
			++realArg;
		}
    
		TCHAR pszInput[MAX_CCH+1];
    
		int cch = strlen(argv[i]);
		if (cch > MAX_CCH) cch = MAX_CCH;
		strncpy(pszInput, argv[i], cch);
    
		pszInput[cch] = 0;
		fprintf(f, "Input    %2d: %s\n", realArg, pszInput);
    

	
		break_class lbcls[MAX_CCH];
		break_action lbrks[MAX_CCH];

		
		// assign line breaking classes
		classifyLnBrk(pszInput, lbcls, cch);

		if (beVerbose) 
		{
			fprintf(f, "LB Classes  : ");
			ShowLBClasses(f, pszInput, cch); fprintf(f, "\n");
		}

		// find the line breaks
		findLineBrk(lbcls, lbrks, cch, false != doCMInTable);

		// write display string
 		fprintf(f, "Output   %2d:", realArg);
		ShowLineBreaks(f, pszInput, lbrks,  cch); fprintf(f, "\n");

    }

    return 0;
}
#endif // WINDOWS_UI

//1 === FIND LINE BREAKS ===================================================

//2 === LINE BREAK SAMPLE CLASSIFICATION =====================================

#define odd(x) ((x) & 1)

#undef IN

// Line Break Character Types

// these have been moved into linebrkk.h
// They are repeated here for convenience

enum break_class
{
	// input types
	OP = 0,	// open
	CL,	// close
	QU,	// quotation
	GL,	// glue
	NS,	// no-start
	EX,	// exclamation/interrogation
	SY,	// Syntax (slash)
	IS,	// infix (numeric) separator
	PR,	// prefix
	PO,	// postfix
	NU,	// numeric
	AL,	// alphabetic
	ID,	// ideograph (atomic)
	IN,	// inseparable
	HY,	// hyphen
	BA,	// break after
	BB,	// break before
	B2,	// break both
	ZW,	// ZW space
	CM,	// combining mark
	WJ, // word joiner

	// used for 4.1 pair table
	H2, // Hamgul 2 Jamo Syllable
	H3, // Hangul 3 Jamo Syllable
	JL, // Jamo leading consonant
	JV, // Jamo vowel
	JT, // Jamo trailing consonant

	// these are not handled in the pair tables
	SA, // south (east) asian
	SP,	// space
	PS,	// paragraph and line separators
	BK,	// hard break (newline)
	CR, // carriage return
	LF, // line feed
	NL, // next line
	CB, // contingent break opportunity
	SG, // surrogate
	AI, // ambiguous
	XX, // unknown
}; 


enum break_class LnBrkClassFromChar[]  =
{		
					// treat CB as BB for now

//  0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
	AL, ZW, GL, GL, BA, GL,	AL, B2, IN, BA, LF, CB, AL, CR, AL, AL, // 00-0f
	AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, // 10-1f

//  ' '  !   "       $   %   &   '   (   )   *   +   ,   -   .    /  
	SP, EX, QU, IN, PR, PO, BB, QU, OP, CL, BA, PR, IN, HY, IN, SY, // 20-2f
//   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?
	NU, NU, NU, NU, NU, NU, NU, NU, NU,	NU,	NS,	AL,	AL,	GL, AL,	EX,	// 30-3f

//   @,  A  B   C   D   E   F   G   H   I   J   K   L   M   N   O  
	CB, ID,	ID, ID,	ID, ID,	ID, GL,	H3, ID,	ID, ID,	JL, ID,	ID, ID,	// 40-4f
	ID, ID, ID,	ID, JT,	ID, JV,	WJ, XX,	SA, ZW,	OP,	AL,	CL,	AL,	IS,	// 50-5f
	CM, AL,	AL, AL,	AL, AL,	AL, AL,	H2, AL,	AL, AL,	AL, AL,	AL, AL,	// 60-6f
	AL, AL, AL,	AL, AL,	AL, AL,	AL, AL,	AL, AL,	OP,	AL,	CL,	AL,	SA,	// 70-7f
//   p  q   r   s   t    u   v   w  x   y   z 
};

enum break_class LBClassFromCh(TCHAR ch)
{
	ch = CharFromVisible(ch);
	if (ch >= 0x7f)
		return XX;
	return LnBrkClassFromChar[ch];
}
/*---------------------------------------------------------------------------
	Function: classify
	
    Determines the character classes for all following
	passes of the algorithm

	Input: Text string
		   Character count

	Output: Array of linebreak classes	

----------------------------------------------------------------------------*/
int classifyLnBrk(const LPTSTR pszText, enum break_class * pcls,  int cch)
{
	for (int ich = 0; ich < cch; ich++)
	{
		pcls[ich] = LBClassFromCh(pszText[ich]);

		// map unknown, ambiguous and contingent to AL by default
		if (pcls[ich] == XX || pcls[ich] == AI || pcls[ich] == CB)
			pcls[ich] = AL;

		// map NL to BK as there's no difference
		if (pcls[ich] == NL)
			pcls[ich] = BK;
	}
	return ich;
}

//2 === LINE BREAK DEFINITIONS ===================================================

// Define some short-cuts for the table
#define oo DIRECT_BRK				// '_' break allowed
#define SS INDIRECT_BRK				// '%' only break across space (aka 'indirect break' below)
#define cc COMBINING_INDIRECT_BRK	// '#' indirect break for combining marks
#define CC COMBINING_PROHIBITED_BRK	// '@' indirect break for combining marks
#define XX PROHIBITED_BRK			// '^' no break allowed_BRK
#define xS HANGUL_SPACE_BRK			// break allowed, except when spaces are used with Hangul

// xS not yet assigned in the table below

//2 === LINE BREAK PAIR TABLE ===================================================
enum break_action brkPairs[][JT+1]=
{   //                ---     'after'  class  ------
	//		1	2	3	4	5	6	7	8	9  10  11  12  13  14  15  16  17  18  19  20  21   22  23  24  25  26  
	//     OP, CL, QU, GL, NS, EX, SY, IS, PR, PO, NU, AL, ID, IN, HY, BA, BB, B2, ZW, CM, WJ,  H2, H3, JL, JV, JT, = after class
	/*OP*/ XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, CC, XX,  XX, XX, XX, XX, XX, // OP open
	/*CL*/ oo, XX, SS, SS, XX, XX, XX, XX, oo, SS, oo, oo, oo, oo, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // CL close
	/*QU*/ XX, XX, SS, SS, SS, XX, XX, XX, SS, SS, SS, SS, SS, SS, SS, SS, SS, SS, XX, cc, XX,  SS, SS, SS, SS, SS, // QU quotation
	/*GL*/ SS, XX, SS, SS, SS, XX, XX, XX, SS, SS, SS, SS, SS, SS, SS, SS, SS, SS, XX, cc, XX,  SS, SS, SS, SS, SS, // GL glue
	/*NS*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, oo, oo, oo, oo, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // NS no-start
	/*EX*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, oo, oo, oo, oo, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // EX exclamation/interrogation
	/*SY*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, SS, oo, oo, oo, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // SY Syntax (slash)
	/*IS*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, SS, SS, oo, oo, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // IS infix (numeric) separator
	/*PR*/ SS, XX, SS, SS, SS, XX, XX, XX, oo, oo, SS, SS, SS, oo, SS, SS, oo, oo, XX, cc, XX,  SS, SS, SS, SS, SS, // PR prefix
	/*PO*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, oo, oo, oo, oo, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // NU numeric
	/*NU*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, SS, SS, SS, oo, SS, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // AL alphabetic
	/*AL*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, SS, SS, oo, SS, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // AL alphabetic
	/*ID*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, SS, oo, oo, oo, SS, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // ID ideograph (atomic)
	/*IN*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, oo, oo, oo, SS, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // IN inseparable
	/*HY*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, SS, oo, oo, oo, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // HY hyphens and spaces
	/*BA*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, oo, oo, oo, oo, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // BA break after 
	/*BB*/ SS, XX, SS, SS, SS, XX, XX, XX, SS, SS, SS, SS, SS, SS, SS, SS, SS, SS, XX, cc, XX,  SS, SS, SS, SS, SS, // BB break before 
	/*B2*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, oo, oo, oo, oo, SS, SS, oo, XX, XX, cc, XX,  oo, oo, oo, oo, oo, // B2 break either side, but not pair
	/*ZW*/ oo, oo, oo, oo, oo, oo, oo, oo, oo, oo, oo, oo, oo, oo, oo, oo, oo, oo, XX, oo, oo,  oo, oo, oo, oo, oo, // ZW zero width space
	/*CM*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, oo, SS, SS, oo, SS, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, oo, // CM combining mark
	/*WJ*/ SS, XX, SS, SS, SS, XX, XX, XX, SS, SS, SS, SS, SS, SS, SS, SS, SS, SS, XX, cc, XX,  SS, SS, SS, SS, SS, // WJ word joiner
																							    
	/*H2*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, SS, oo, oo, oo, SS, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, SS, SS, // Hangul 2 Jamo syllable
	/*H3*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, SS, oo, oo, oo, SS, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, SS, // Hangul 3 Jamo syllable
	/*JL*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, SS, oo, oo, oo, SS, SS, SS, oo, oo, XX, cc, XX,  SS, SS, SS, SS, oo, // Jamo Leading Consonant
	/*JV*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, SS, oo, oo, oo, SS, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, SS, SS, // Jamo Vowel
	/*JT*/ oo, XX, SS, SS, SS, XX, XX, XX, oo, SS, oo, oo, oo, SS, SS, SS, oo, oo, XX, cc, XX,  oo, oo, oo, oo, SS, // Jamo Trailing Consonant
	
};


//2 === FIND LINE BREAKS =======================================================

// placeholder function for complex break analysis
// cls - resolved line break class, may differ from pcls[0]
// pcls - pointer to array of line breaking classes (input)
// pbrk - pointer to array of line breaking opportunities (output)
//
int findComplexBreak(enum break_class cls, enum break_class *pcls, enum break_action *pbrk, int cch)
{
        if (!cch)
            return 0;
        for (int ich = 0; ich < cch; ich++) {

            // .. do complex break analysis here
            // and report any break opportunities in pbrk ..

            if (pcls[ich] != SA)
                    break;
            }
        return ich;
}

/* already declared a the top:
enum break_action {
       DIRECT_BRK = 0,             // _ in table
       INDIRECT_BRK,               // % in table
       COMBINING_INDIRECT_BRK,     // # in table
       COMBINING_PROHIBITED_BRK,   // @ in table
       PROHIBITED_BRK,             // ^ in table
	   EXPLICTI_BRK };			   // ! in rules
*/

// handle spaces separately, all others by table
// pcls - pointer to array of line breaking classes (input)
// pbrk - pointer to array of line break opportunities (output)
// cch - number of elements in the arrays (�count of characters�) (input)
// ich - current index into the arrays (variable) (returned value)
// cls - current resolved line break class for 'before' character (variable)
int findLineBrk(enum break_class *pcls, enum break_action *pbrk, int cch, bool fTailorSPCM)
{
    if (!cch) 
        return 0;

    enum break_class cls = pcls[0];

	if (cls == LF)
		cls = BK;

    // loop over all pairs in the string up to a hard break
    for (int ich = 1; (ich < cch) && (cls != BK) && (cls != CR || pcls[ich] == LF); ich++) {

        // handle spaces explicitly
        if (pcls[ich] == SP) {
            pbrk[ich-1] = PROHIBITED_BRK;   // apply rule LB 4: � SP
            continue;						// do not update cls
        }

		if (pcls[ich] == BK || pcls[ich] == LF) {
			pbrk[ich-1] = PROHIBITED_BRK;
			cls = BK;
			continue;
		}

		if (pcls[ich] == CR)
		{
			pbrk[ich-1] = PROHIBITED_BRK;
			cls = CR;
			continue;
		}

        // handle complex scripts in a separate function
        if (pcls[ich] == SA) {
            ich += findComplexBreak(cls, &pcls[ich-1], &pbrk[ich-1], cch - (ich-1));
            if (ich < cch)
                cls = pcls[ich];
            continue;
        }

        ASSERT(cls < SP);
        ASSERT(pcls[ich] < SP);

        // lookup pair table information in brkPairs[before, after];
        enum break_action brk = brkPairs[cls][pcls[ich]];

        pbrk[ich-1] = brk;							   // save break action in output array

        if (brk == INDIRECT_BRK) {					   // resolve indirect break
			if (pcls[ich - 1] == SP)				   // if context is A SP * B
				pbrk[ich-1] = INDIRECT_BRK;			   //		break opportunity
			else									   // else
				pbrk[ich-1] = PROHIBITED_BRK;		   //		no break opportunity
		} else if (brk == COMBINING_PROHIBITED_BRK) {  // this is the case OP SP* CM
            pbrk[ich-1] = COMBINING_PROHIBITED_BRK;    // no break allowed
            if (pcls[ich-1] != SP)
                continue;							   // apply rule 7b: X CM* -> X
        } else if (brk == COMBINING_INDIRECT_BRK) {    // resolve combining mark break
            pbrk[ich-1] = PROHIBITED_BRK;			   // don't break before CM
            if (pcls[ich-1] == SP){
                if (!fTailorSPCM)					    // untailored:
                    pbrk[ich-1] = COMBINING_INDIRECT_BRK;    // apply rule SP � 
                else									// optionally, keep SP CM together
                {
                    pbrk[ich-1] = PROHIBITED_BRK;
                    if (ich > 1)
                        pbrk[ich-2] = ((pcls[ich - 2] == SP) ? INDIRECT_BRK : DIRECT_BRK);
                }
            } else										// apply rule 7b: X CM * -> X
                continue;								// don't update cls
        }
        cls = pcls[ich];								// save cls of current character
    }
    // always break at the end
    pbrk[ich-1] = EXPLICIT_BRK;

    return ich;
}

//1 === VERIFY PAIR TABLE =======================================================
#ifdef VERIFY_PAIR_TABLE 

// === MAP LB CLASSE TO ALIASES ===========================================================

// --- names -----
#define propID 0
#define propShort 1
#define propLong 2

struct LBAlias{
	int id;
	char * pszShort;
	char * pszLong;
	};

struct LBAlias LBAliases[] = {
//    AI , "AI", "Ambiguous",
    AL , "AL", "Alphabetic",
    B2, "B2", "BreakBoth",
    BA, "BA", "BreakAfter",
    BB, "BB", "BreakBefore",
    BK, "BK", "MandatoryBreak",
    CB, "CB", "ContingentBreak",
    CL, "CL", "ClosePunctuation",
    CM, "CM", "CombiningMark",
    CR, "CR", "CarriageReturn",
    EX, "EX", "Exclamation",
    GL, "GL", "Glue",
    H2, "H2", "H2",
    H3, "H3", "H3",
    HY, "HY", "Hyphen",
    ID, "ID", "Ideographic",
    IN, "IN", "Inseparable",
    IS, "IS", "InfixNumeric",
    JL, "JL", "JL",
    JT, "JT", "JT",
    JV, "JV", "JV",
    LF, "LF", "LineFeed",
    NL, "NL", "NextLine",
    NS, "NS", "Nonstarter",
    NU, "NU", "Numeric",
    OP, "OP", "OpenPunctuation",
    PO, "PO", "PostfixNumeric",
    PR, "PR", "PrefixNumeric",
    QU, "QU", "Quotation",
    SA, "SA", "ComplexContext",
    SG, "SG", "Surrogate",
    SP, "SP", "Space",
    SY, "SY", "BreakSymbols",
    WJ, "WJ", "WordJoiner",
    XX, "XX", "Unknown",
    ZW, "ZW", "ZWSpace",
};

char * pszSampleCharsFromLBClass[] = {
    /*OP*/   "U+0028 LEFT PARENTHESIS",
    /*CL*/   "U+0029 RIGHT PARENTHESIS",
    /*QU*/   "U+0022 QUOTATION MARK",
    /*GL*/   "U+00A0 NO-BREAK SPACE",
    /*NS*/   "U+30A1 KATAKANA LETTER SMALL A",
    /*EX*/   "U+0021 EXCLAMATION MARK",
    /*SY*/   "U+002F SOLIDUS",
    /*IS*/   "U+002C COMMA",
    /*PR*/   "U+0024 DOLLAR SIGN",
    /*PO*/   "U+0025 PERCENT SIGN",
    /*NU*/   "U+0030 DIGIT ZERO",
    /*AL*/   "U+0023 NUMBER SIGN",
    /*ID*/   "U+2E80 CJK RADICAL REPEAT",
    /*IN*/   "U+2024 ONE DOT LEADER",
    /*HY*/   "U+002D HYPHEN-MINUS",
    /*BA*/   "U+2010 HYPHEN",
    /*BB*/   "U+00B4 ACUTE ACCENT",
    /*B2*/   "U+2014 EM DASH",
    /*ZW*/   "U+200B ZERO WIDTH SPACE",
    /*CM*/   "U+0302 COMBINING ACUTE ACCENT",
    /*WJ*/   "U+2060 WORD JOINER",
    /*H2*/	 "U+AC00 HANGUL SYLLABLE GA",
    /*H3*/   "U+AC01 HANGUL SYLLABLE GAG",
    /*JL*/   "U+1100 HANGUL CHOSEONG KIYEOK",
    /*JV*/   "U+1161 HANGUL JUNGSEONG A",
    /*JT*/   "U+11A8 HANGUL JONGSEONG KIYEOK",
};	 

// the above list is limited to LB classes shown in Tables 2 and 3 of UAX#14

char * pszLBAliasFromID(int id, bool fLong)
{
	for (int i = 0; i < sizeof LBAliases/ sizeof(struct LBAlias); i++)
	{
		if (LBAliases[i].id == id)
			if (fLong)
				return LBAliases[i].pszLong;
			else 
				return LBAliases[i].pszShort;
	}
	return ("");
}

char * pszShortFromLbclass(int id)
{
	return pszLBAliasFromID(id, false);
}
char * pszLongFromLbclass(int id)
{
	return pszLBAliasFromID(id, true);
};

// --- HELPER CLASS for PUBLIC BUILDS
#ifndef _CMAPFILE_H_
class CTextFile
{
	public:
		CTextFile(wchar_t *pszFilename, bool fIgnored)
		{
			FILE* fp = _wfopen(pszFilename, L"w");
		}
		void PutString(char * psz)
		{
			fputs(psz, _fp);
		}
		void PutLine(char * psz)
		{
			fputs(psz, _fp);
			fputs("\n", _fp);
		}
		FILE * _fp;
};
#endif

//=== TABLE VERIFICATION AND HTML GENERATION ===
class table_verify
{
public: 
	table_verify()
		: out(CTextFile(VERIFICATION_FILE, true))
	{

	}
	void verifyTable();
private:
	void no_break_pairs_with_space(enum break_class cb, enum break_class ca, char * pszRule = 0);
	void table_verify::no_break_pairs_with_space_for_combining(enum break_class cb, enum break_class ca,  char * pszRule);
	void no_break_without_space_for_combining(enum break_class cb, enum break_class ca, char * pszRule = 0);
	void no_break_pair(enum break_class cb, enum break_class ca, char * pszRule = 0);
	void break_pair(enum break_class cb, enum break_class ca, char * pszRule = 0);
	void init_table();
	void terminate_table();
	void init_row(char * pszHeader = 0, char * pszTitle = 0);
	void terminate_row();
	void init_col(char * pszTitle);
	void terminate_col();
	void dotitle(char * pszTitle);
	CTextFile out;
};

void table_verify::no_break_pairs_with_space(enum break_class cb, enum break_class ca, char * pszRule)
{
	ASSERT(brkPairs[cb][ca] == XX);
	if (pszRule)
	{
		init_col(pszRule); out.PutString("^"); terminate_col();
	}
}
void table_verify::no_break_pairs_with_space_for_combining(enum break_class cb, enum break_class ca,  char * pszRule)
{
	ASSERT(brkPairs[cb][ca] == CC);
	if (pszRule)
	{
		init_col(pszRule); out.PutString("@"); terminate_col();
	}
}
void table_verify::no_break_without_space_for_combining(enum break_class cb, enum break_class ca,  char * pszRule)
{
	ASSERT(brkPairs[cb][ca] == cc);
	if (pszRule)
	{
		init_col(pszRule); out.PutString("#"); terminate_col();
	}
}
void table_verify::no_break_pair(enum break_class cb, enum break_class ca,  char * pszRule)
{
	ASSERT(brkPairs[cb][ca] == SS);
	if (pszRule)
	{
		init_col(pszRule); out.PutString("%"); terminate_col();
	}
}
void table_verify::break_pair(enum break_class cb, enum break_class ca,  char * pszRule)
{
	ASSERT(brkPairs[cb][ca] == oo);
	if (pszRule)
	{
		init_col(pszRule); out.PutString("_"); terminate_col();
	}
}

void table_verify::init_table()
{
	out.PutLine("  <table class=\"pair\" cellSpacing=\"0\" width=\"88%\" border=\"1\">");
}

void table_verify::terminate_table()
{
	out.PutLine("  </table>");
}
void table_verify::init_row(char * pszHeader, char * pszTitle)
{
	out.PutLine("    <tr>");
	init_col(pszTitle);
	if (pszHeader)
		out.PutString(pszHeader);
	else
		out.PutString("&nbsp");
	terminate_col();
}
void table_verify::terminate_row()
{
	out.PutLine("    </tr>");
}

void table_verify::dotitle(char * pszTitle)
{
	out.PutString("title=\"");
	out.PutString(pszTitle);
	out.PutString("\"");
}

void table_verify::init_col(char * pszTitle)
{
	out.PutString("      <th ");
	if (pszTitle)
		dotitle(pszTitle);
	out.PutString(">");
}
void table_verify::terminate_col()
{
	out.PutLine("</th>");
}

#define nextclass(x) (x = (enum break_class)(x + 1))

void table_verify::verifyTable()
{
	// Running this code will stop excecution with an assert whenever
	// an entry in the pair table does not match the statement of the
	// rules of the line break algorithm below.

	// At the same time, the code produces an HTML version of the LB
	// pair table in a format that matches that of UAX#14, except it
	// includes the Hangul and Jamo rows and and columns.

	// Rules that are not handled in the pair table, are not verified
	// for example 1, 2, 3a, 3b, 3c. Rules 7b and 7c are handled as
	// described below.

	char szTitle[100];
	char szHeader[100];
	init_table();

	init_row();
	for (enum break_class ca = OP; ca <=JT; nextclass(ca) )
	{
		strcpy(szTitle, pszSampleCharsFromLBClass[ca]);
		strcat(szTitle, "; ");
		strcat(szTitle, pszShortFromLbclass(ca));
		strcat(szTitle, "=");
		strcat(szTitle, pszLongFromLbclass(ca));
		strcpy(szHeader, "<a class=\"charclass\" href=\"#");
		strcat(szHeader, pszShortFromLbclass(ca));
		strcat(szHeader, "\">");
		strcat(szHeader, pszShortFromLbclass(ca));
		strcat(szHeader, "</a>");
		init_col(szTitle);
		out.PutString(szHeader);
		terminate_col();
	}
	terminate_row();
	for (enum break_class cb = OP; cb <= JT; nextclass(cb) )
	{
		strcpy(szTitle, pszSampleCharsFromLBClass[cb]);
		strcpy(szHeader, "<a class=\"charclass\" href=\"#");
		strcat(szHeader, pszShortFromLbclass(cb));
		strcat(szHeader, "\">");
		strcat(szHeader, pszShortFromLbclass(cb));
		strcat(szHeader, "</a>");
		init_row(szHeader, szTitle);

		for (enum break_class ca = OP; ca <=JT; nextclass(ca) )
		{
			/**
			// LB 1  Assign a line breaking class to each code point of the input. Resolve AI, CB, SA, SG, XX
			// LB 2a  Never break at the start of text.
			// 2a: � sot
			// LB 2b  Always break at the end of text.
			// 2b: ! eot
			// LB 3a  Always break after hard line breaks (but never between CR and LF).
			// 3a: BK !
			// LB 3b  Treat CR followed by LF, as well as CR, LF and NL as hard line breaks.
			if (cb == CR && cb == LF)
				no_break(cb, ca); // 3b: CR � LF
			else if (cb == CR || cb == LF || cb == NL)
				must_break_after(cb); // 3b: ( CR | LF | NL ) !
			//LB 3c  Do not break before hard line breaks.
			else if (ca == BK || ca == CR || ca == LF || ca == NL)
				no_break_pair(ca); // 3c: � ( BK | CR | LF | NL )

			// LB 4  Do not break before spaces or zero-width space.
			else**/ if (ca == SP || ca == ZW)
				no_break_pairs_with_space(cb, ca, "4: � ( SP | ZW )"); // 4: � ( SP | ZW )
			// LB 5  Break after zero-width space.
			else if (cb == ZW)
				break_pair(cb, ca, "5: ZW �"); // 5: ZW �
			// LB 7b  Do not break a combining character sequence;  treat it as if it has the LB class of the base character in all of the following rules.
			// Treat X CM* as if it were X.
			// Where X is any line break class except SP, BK, CR, LF, NL or ZW. 
			// For a pair table implementation LB 7b can be restated equivalently as: X CM* -> X +
			// This is handled by putting X � CM (which includes CM � CM) into the pair table, and
			// changing the break_action to account for the additional rule that
			// CM takes on the class of X for later line break
			else if ((cb == OP) && (ca == CM))
				no_break_pairs_with_space_for_combining(cb, ca, "7b: X CM* -> X ; 9: OP SP * � ; 4: � ( SP | ZW )"); // 7b: X CM* -> X ; 9: OP SP * x  ; 4: � ( SP | ZW )
			else if ((cb != SP && cb != BK && cb != CR && cb != LF && cb != NL && cb != ZW) && (ca == CM))
				no_break_without_space_for_combining(cb, ca, "7b: X CM* -> CM ; 20: ALL �"); // 7b: X CM* -> CM ; 20: ALL � 
			//LB 7c  Treat any remaining combining mark as AL.
			// carried out by rewriting all rules below that use AL
			// LB 8  Do not break before �]� or �!� or �;� or �/�, even after spaces.
			else if(ca == CL || ca == EX || ca == IS || ca == SY )
				no_break_pairs_with_space(cb, ca, "8: � (CL | EX | IS | SY ) ; 4: � ( SP | ZW )"); // 8: � (CL | EX | IS | SY ) ; 4: � ( SP | ZW )
			// LB 9  Do not break after �[�, even after spaces.
			else if (cb == OP)
				no_break_pairs_with_space(cb, ca, "9: OP SP* � ; 4: � ( SP | ZW )"); // 9: OP SP* � ; 4: � ( SP | ZW )
			// LB 10  Do not break within ��[�, , even with intervening spaces.
			else if (cb == QU && ca == OP)
				no_break_pairs_with_space(cb, ca, "10: QU SP* � OP ; 4: � ( SP | ZW )"); // 10: QU SP* � OP ; 4: � ( SP | ZW )
			// LB 11  Do not break within �]h�, even with intervening spaces.
			else if (cb == CL && ca == NS)
				no_break_pairs_with_space(cb, ca, "11: CL SP* � NS ; 4: � ( SP | ZW )"); // 11: CL SP* � NS ; 4: � ( SP | ZW )
			// LB 11a  Do not break within ����, even with intervening spaces.
			else if (cb == B2 && ca == B2)
				no_break_pairs_with_space(cb, ca, "11a: B2 � B2; ; 4: � ( SP | ZW )"); // 11a: B2 � B2; ; 4: � ( SP | ZW )
			// LB 11b  Do not break before or after WORD JOINER and related characters.
			else if (ca == WJ)
				no_break_pairs_with_space(cb, ca, "11b: � WJ; ; 4: � ( SP | ZW )"); // 11b: � WJ ; 4: � ( SP | ZW )
			else if (cb == WJ)
				no_break_pair(cb, ca, "11b: WJ � ; 4: � ( SP | ZW ) ; 12: SP �"); // 11b: WJ � ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 12  Break after spaces.
			//else if (cb == SP)
			// break_pair(cb, ca, "12: SP �"); // 12: SP �
			// ** handled by allowing rule 12 below
			// LB 13  Do not break before or after NBSP and related characters.
			else if (ca == GL)
				no_break_pair(cb, ca, "13: � GL ; 4: � ( SP | ZW ) ; 12: SP �"); // 13: � GL ; 4: � ( SP | ZW ) ; 12: SP �
			else if (cb == GL)
				no_break_pair(cb, ca, "13: GL � ; 4: � ( SP | ZW ) ; 12: SP �"); // 13: GL � ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 14  Do not break before or after ���.
			else if (ca == QU)
				no_break_pair(cb, ca, "14: � QU ; 4: � ( SP | ZW ) ; 12: SP �"); // 14: � QU ; 4: � ( SP | ZW ) ; 12: SP �
			else if (cb == QU)
				no_break_pair(cb, ca, "14: QU � ; 4: � ( SP | ZW ) "); // 14: QU � ; 4: � ( SP | ZW ) 
			else if (ca == CB)
				break_pair(cb, ca, "13: � CB; ; 12: SP �"); // 13: � CB; ; 12: SP �  
			else if (cb == CB)
				break_pair(cb, ca, "13: CB � ; 4: � ( SP | ZW ) "); // 13: CB � ; 4: � ( SP | ZW ) 
			// LB 15  Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents.
			else if (ca == BA || ca == HY || ca == NS)
				no_break_pair(cb, ca, "15: � BA | HY | NS ; 4: � ( SP | ZW ) ; 12: SP �"); // 15:� BA | HY | NS ; 4: � ( SP | ZW ) ; 12: SP �
			else if (cb == BB)
				no_break_pair(cb, ca, "15: BB � ; 4: � ( SP | ZW ) ; 12: SP �"); // 15: BB � ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 16  Do not break between two ellipses, or between letters or numbers and ellipsis.
			else if (cb == CM && ca == IN)
				no_break_pair(cb, ca, "7c: CM->AL ; 16: CM * IN )  ; 4: � ( SP | ZW ) ; 12: SP �"); // 7c: CM->AL ; 16: CM * IN )  ; 4: � ( SP | ZW ) ; 12: SP �
			else if ((cb == AL || cb == ID || cb == IN || cb == NU) && ca == IN)
				no_break_pair(cb, ca, "16:( AL | ID | IN | NU )� IN  ; 4: � ( SP | ZW ) ; 12: SP �"); // 16:( AL | ID | IN | NU )� IN  ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 17  Do not break within �a9�, �3a�, or �H%�.
			else if (cb == ID && ca == PO)
				no_break_pair(cb, ca, "17: ID � PO ; 4: � ( SP | ZW ) ; 12: SP �"); // 17: ID � PO ; 4: � ( SP | ZW ) ; 12: SP �
			else if (cb == AL && ca == NU)
				no_break_pair(cb, ca, "17: AL � NU ; 4: � ( SP | ZW ) ; 12: SP �"); // 17: AL � NU ; 4: � ( SP | ZW ) ; 12: SP �
			else if (cb == NU && ca == AL)
				no_break_pair(cb, ca, "17: NU � AL ; 4: � ( SP | ZW ) ; 12: SP �"); // 17: NU � AL ; 4: � ( SP | ZW ) ; 12: SP �
			else if (cb == CM && ca == NU)
				no_break_pair(cb, ca, "7c: CM->AL ; 17: CM � NU ; 4: � ( SP | ZW ) ; 12: SP �"); // 7c: CM->AL ; 17: CM � NU ; 4: � ( SP | ZW ) ; 12: SP �
			else if (cb == NU && ca == CM)
				no_break_pair(cb, ca, "7c: CM->AL ; 17: NU � CM ; 4: � ( SP | ZW ) ; 12: SP �"); // 7c: CM->AL ; 17: NU � CM ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 18  Do not break between the following pairs of classes.
			else if((cb == CL || cb == NU) && ca == PO)
				no_break_pair(cb, ca, "18: ( CL | NU )� PO ; 4: � ( SP | ZW ) ; 12: SP �"); // 18:( CL | NU )� PO ; 4: � ( SP | ZW ) ; 12: SP �
			else if( (cb == HY || cb == IS || cb ==  NU || cb == SY ) && ca == NU)
				no_break_pair(cb, ca, "18: ( HY | IS | NU | SY )� NU ; 4: � ( SP | ZW ) ; 12: SP �"); // 18:( HY | IS | NU | SY )� NU ; 4: � ( SP | ZW ) ; 12: SP �
			else if (cb == PR && (ca == AL || ca == HY || ca == ID || ca == NU || ca == OP) )
				no_break_pair(cb, ca, "18: PR � ( AL | HY | ID | NU | OP ) ; 4: � ( SP | ZW ) ; 12: SP �"); // 18: PR � ( AL | HY | ID | NU | OP ) ; 4: � ( SP | ZW ) ; 12: SP �
			else if(cb == SY && ca == NU)
				no_break_pair(cb, ca, "18: SY � NU ; 4: � ( SP | ZW ) ; 12: SP �"); // 18; SY � NU ; 4: � ( SP | ZW ) ; 12: SP �
			else if (cb == PR && ca == CM)
				no_break_pair(cb, ca, "7c: CM -> AL ; 18: PR � AL ; 4: � ( SP | ZW ) ; 12: SP �"); // 7c: CM->AL ; 18: PR � CM ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 18b Do not break a Korean syllable.
			else if (cb == JL  && (ca == JL || ca == JV || ca == H2 || ca == H3 ))
				no_break_pair(cb, ca, "18b: JL  � ( JL | JV | H2 | H3 ) ; 4: � ( SP | ZW ) ; 12: SP �"); // 18b: JL  � ( JL | JV | H2 | H3 ) ; 4: � ( SP | ZW ) ; 12: SP �
			else if ((cb == JV || cb == H2 ) && (ca == JV || ca == JT))
				no_break_pair(cb, ca, "18b: ( JV | H2 ) � ( JV | JT ) ; 4: � ( SP | ZW ) ; 12: SP �"); // 18b: ( JV | H2 ) � ( JV | JT ) ; 4: � ( SP | ZW ) ; 12: SP �
			else if ((cb == JT || cb == H3 ) && ca == JT)
				no_break_pair(cb, ca, "18b: ( JT | H3 ) � JT ; 4: � ( SP | ZW ) ; 12: SP �"); // 18b: ( JT | H3 ) � JT ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 18c Treat a Korean Syllable Block the same as ID.
			else if ((cb == JL || cb == JV || cb == JT || cb == H2 || cb == H3 ) && (ca == IN || ca == PO))
				no_break_pair(cb, ca, "18c: ( JL | JV | JT | H2 | H3 ) � (IN | PO) ; 4: � ( SP | ZW ) ; 12: SP �"); // 18c: ( JL | JV | JT | H2 | H3 ) � (IN | PO) ; 4: � ( SP | ZW ) ; 12: SP �
			else if ((cb == PR) && (ca == JL || ca == JV || ca == JT || ca == H2 || ca == H3))
				no_break_pair(cb, ca, "18c: (PR � ( JL | JV | JT | H2 | H3 ) ; 4: � ( SP | ZW ) ; 12: SP �"); // 18c: (PR � ( JL | JV | JT | H2 | H3 ) ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 19  Do not break between alphabetics (�at�).
			else if (cb == AL && ca == AL)
				no_break_pair(cb, ca, "19: AL � AL ; 4: � ( SP | ZW ) ; 12: SP �"); // 19: AL � AL ; 4: � ( SP | ZW ) ; 12: SP �
			else if ((cb == CM && ca == AL) || (cb == AL && ca == CM))
				no_break_pair(cb, ca, "7b CM -> AL && 19: AL * AL ; 4: � ( SP | ZW ) ; 12: SP �"); // 7b CM -> AL && 19: AL * AL ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 19b  Do not break between numeric punctuation and alphabetics ("e.g.").
			else if (cb == IS && ca == AL)
				no_break_pair(cb, ca, "19b: IS � AL ; 4: � ( SP | ZW ) ; 12: SP �"); // 19b: IS � AL ; 4: � ( SP | ZW ) ; 12: SP �
			// LB 20  Break everywhere else.
			else
				break_pair(cb, ca, "20: ALL � ; � ALL"); // 20 ALL � ; � ALL
		}
		terminate_row();
	}
	terminate_table();
}
#endif // ifdef VERIFY_PAIR TABLE

void verifyTable()
{
	#ifdef VERIFY_PAIR_TABLE
	class table_verify tv;
	tv.verifyTable();
	#endif
}


//[EOF]


.