/******************************************
dosToTex - converts 8bit encodings to (La)TeX
          multibyte escape sequences.

usage:

dosToTex [ -1 | -u ] < inputfile.dos > outputfile.tex
dosToTex -h

Switches:
-1 : assume that the input file has been encoded in the cp1252
     (Windows) character set
-u : assume that the input file has been encoded in the "US ASCII"
     (EndNote terminology) character set
-h : print a help line
neither -1 nor -u: assume that the input file has been encoded in
     the ANSI (EndNote terminology) character set

Note that it does NOT convert line feeds etc as does dos2unix; therefore
piping in addition through dos2unix may be useful:

dosToTex [ -1 | -u ] < inputfile.dos | dos2unix > outputfile.tex
dosToTex [ -1 | -u ] < inputfile.dos | sed 's/\r//' > outputfile.tex

As a more versatile alternative, one might combine the converter 'recode' with
the use of
   \usepackage{latin1}[inputenc]
in the LaTeX source: http://recode.progiciels-bpi.ca/
                      http://directory.fsf.org/recode.html

See also
    http://www.cs.uu.nl/wais/html/na-dir/internationalization/font-faq.html
    http://wwwvms.mppmu.mpg.de/FAQ/iso-charset.faq
    http://budling.nytud.hu/~szigetva/etcetera/Hungarian/converters/dos2tex
    http://www.ctan.org/tex-archive/support/xtexshell/tfc.cc
    http://billposer.org/Software/uni2ascii.html

Richard J. Mathar, http://www.mpia.de/~mathar
Dec 07, 2015
*****************************************/
#include "config.h"

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif

#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

void usage(char *argv0)
{
	printf("usage: %s ; # decode EndNote\n",argv0) ;
	printf("\t %s -1 ; # decode the CP1252 Windows character set\n",argv0) ;
	printf("\t %s -u ; # decode US ASCII\n",argv0) ;
	printf("\t %s -h ; # help: print these usage lines here\n",argv0) ;
	printf("Reads from the standard input, writes to the standard output.\n") ;
}

int main(int argc, char *argv[])
{
	int c ;
	char oc ;
	int useendnote = 1 , /* according to page 108 of the EndNote 7 Manual, default */
	    useUs = 0 , /* US ASCII according to the table on p 109 of the EndNote 7 manual */
	    usecpc1252 = 0 ; /* according to http://czyborra.com/charsets/cp1252.gif */
	while (  (oc=getopt(argc,argv,"1hu")) != -1 )
	{
		switch(oc)
		{
		case '1' :
			usecpc1252 = 1 ;
			useendnote = 0 ;
			break ;
		case 'u' :
			useUs = 1 ;
			useendnote = 0 ;
			break ;
		case 'h' :
			usage(argv[0]) ;
			return 0 ;
		case '?' :
			fprintf(stderr,"Invalid command line option %c\n",oc) ;
			usage(argv[0]) ;
			break ;
		}
	}

		/* start at 0x80, end at 0xff, according to http://czyborra.com/charsets/cp1252.gif */
	char *cpc1252[] = {
			NULL,
			NULL,
			",",
			"$f$",
			"``",
			"$\\ldots$",
			"\\dag ",
			"\\ddag ",
			"\\symbol{94}",
			NULL,
			"\\v{S}",
			"<",
			"{\\OE}",
			NULL,
			"\\v{Z}",
			NULL,
			NULL,
			"'",
			"'",
			"``",
			"''",
			NULL,
			"-",
			"--",
			"\\symbol{126}",
			NULL,
			"\\v{s}",
			">",
			"{\\oe}",
			NULL,
			"\\v{z}",
			"{\\\"Y}",
			NULL,
			"!'",
			NULL,
			"\\pounds ",
			NULL,
			NULL,
			"$\\mid$",
			"\\S ",
			"\\symbol{127}",
			"\\copyright ",
			NULL,
			"$\\ll$",
			"$\\neg$",
			NULL,
			NULL,
			"\\symbol{22}",
			"$^0$",
			"$\\pm$",
			"$^2$",
			"$^3$",
			"'",
			"$\\mu$",
			"\\P ",
			"$\\cdot$",
			NULL,
			"$^1$",
			NULL,
			"$\\gg$",
			"1/4",
			"1/2",
			"3/4",
			"?'",
			"\\`A" ,
			"\\'A" ,
			"\\^A" ,
			"\\~A" ,
			"{\\\"A}" ,
			"{\\AA}" ,
			"{\\AE}" ,
			"\\c{C}" ,
			"\\`E" ,
			"\\'E" ,
			"\\^E" ,
			"{\\\"E}" ,
			"\\`I" ,
			"\\'I" ,
			"\\^I" ,
			"{\\\"I}" ,
			NULL,
			"\\~N" ,
			"\\`O" ,
			"\\'O" ,
			"\\^O" ,
			"\\~O" ,
			"{\\\"O}" ,
			"$\\times$" ,
			"{\\O}" ,
			"\\`U" ,
			"\\'U" ,
			"\\^U" ,
			"{\\\"U}" ,
			"\\'Y" ,
			NULL,
			"\\ss " ,
			"\\`a" ,
			"\\'a" ,
			"\\^a" ,
			"\\~a" ,
			"{\\\"a}" ,
			"{\\aa}" ,
			"{\\ae}" ,
			"\\c{c}" ,
			"\\`e" ,
			"\\'e" ,
			"\\^e" ,
			"{\\\"e}" ,
			"\\`{\\i}" ,
			"\\'{\\i}" ,
			"\\^{\\i}" ,
			"{\\\"{\\i}}" ,
			NULL,
			"\\~n" ,
			"\\`o" ,
			"\\'o" ,
			"\\^o" ,
			"\\~o" ,
			"\\\"o" ,
			"$\\div$" ,
			"{\\o}" ,
			"\\`u" ,
			"\\'u" ,
			"\\^u" ,
			"{\\\"u}" ,
			"\\'y" ,
			NULL,
			"{\\\"y}"
	} ;
	/* starts at 129, ends at 255, according to page 108 of the EndNote 7 Manual */
	char *endn[] = {
			"\\_",
			NULL,
			NULL,
			NULL,
			"$\\ldots$",
			"\\dag ",
			"\\ddag ",
			"\\symbol{94}",
			NULL,
			NULL,
			NULL,
			"{\\OE}",
			NULL,
			NULL,
			NULL,
			NULL,
			NULL,
			NULL,
			"``",
			"''",
			NULL,
			NULL,
			"--",
			"\\symbol{126}",
			NULL,
			NULL,
			NULL,
			"{\\oe}",
			NULL,
			NULL,
			"{\\\"Y}",
			NULL,
			"!`", 
			NULL,
			"\\pounds ",
			NULL,
			NULL,
			"$\\mid$",
			"\\S ",
			"\\symbol{127}",
			"\\copyright ",
			NULL,
			"$\\ll$",
			"$\\neg$",
			NULL,
			NULL,
			"\\symbol{22}",
			"\\symbol{23}",
			"$\\pm$",
			"$^2$",
			NULL,
			"'",
			"$\\mu$",
			"\\P ",
			"$\\cdot$",
			NULL,
			"$^1$",
			"$^0$",
			"$\\gg$",
			"1/4","1/2","3/4",
			"?`",
			"\\`A",
			"\\'A",
			"\\^A",
			"\\~A",
			"{\\\"A}",
			"{\\AA}",
			"{\\AE}",
			"\\c{C}",
			"\\`E",
			"\\'E",
			"\\^E",
			"{\\\"E}",
			"\\`I",
			"\\'I",
			"\\^I",
			"{\\\"I}",
			NULL,
			"\\~N",
			"\\`O",
			"\\'O",
			"\\^O",
			"\\~O",
			"{\\\"O}",
			"$\\times$",
			"{\\O}",
			"\\`U",
			"\\'U",
			"\\^U",
			"{\\\"U}",
			"\\'Y",
			NULL,
			"\\ss ",
			"\\`a",
			"\\'a",
			"\\^a",
			"\\~a",
			"{\\\"a}",
			"{\\aa}",
			"{\\ae}",
			"\\c{c}",
			"\\`e",
			"\\'e",
			"\\^e",
			"{\\\"e}",
			"\\`{\\i}",
			"\\'{\\i}",
			"\\^{\\i}",
			"{\\\"{\\i}}",
			NULL,
			"\\~n",
			"\\`o",
			"\\'o",
			"\\^o",
			"\\~o",
			"{\\\"o}",
			"$\\div$",
			"{\\o}",
			"\\`u",
			"\\'u",
			"\\^u",
			"{\\\"u}",
			"\\'y",
			NULL,
			"{\\\"y}"
	} ;
	/* starts at 128, ends at 165 */
	char *usasc[] = {
			"\\c{C}",
			"{\\\"u}",
			"\\'e",
			"\\^a",
			"{\\\"a}",
			"\\`a",
			"{\\aa}",
			"\\c{c}",
			"\\^e",
			"{\\\"e}",
			"\\`e",
			"{\\\"{\\i}}",
			"\\^{\\i}",
			"\\`{\\i}",
			"{\\\"A}",
			"{\\AA}",
			"\\'E",
			"{\\ae}",
			"{\\AE}",
			"\\^o",
			"{\\\"o}",
			"\\`o",
			"\\^u",
			"\\`u",
			"{\\\"y}",
			"{\\\"O}",
			"{\\\"U}",
			NULL,
			"\\pounds ",NULL,
			"P",
			"$f$",
			"\\'a",
			"\\'{\\i}",
			"\\'o",
			"\\'u",
			"\\~n",
			"\\~N",NULL,NULL,
			"?'",
			"\\_",
			"$\\neg$",
			"1/2",
			"1/4",
			"!'",
			"$\\ll$",
			"$\\gg$"
		/*
		case 225 :
			"\\ss ",
		case 246 :
			"$\\div$",
		*/
	} ;

	if ( usecpc1252)
	{
		while( (c=getchar()) != EOF)
		{
			if( c >= 0x80 && c <= 0xff)	/* in the table ? */
				if ( cpc1252[c-0x80] )
					printf("%s",cpc1252[c-0x80]) ;
				else
					putchar(c) ;
			else
				putchar(c) ;
		}
	}
	else if ( useendnote)
	{
		while( (c=getchar()) != EOF)
		{
			if( c >= 128 && c <= 255)	/* in the table ? */
			{
				if ( endn[c-128] )
					printf("%s",endn[c-128]) ;
				else
					putchar(c) ;
			}
			else
				putchar(c) ;
		}
	}
	else if ( useUs)
	{
		while( (c=getchar()) != EOF)
		{
			if( c >= 128 && c <= 175)	/* in the table ? */
				if ( usasc[c-128] )
					printf("%s",usasc[c-128]) ;
				else
					putchar(c) ;
			else if ( c == 179)
				printf("$^3$") ;
			else if ( c == 225)
				printf("\\ss ") ;
			else if ( c == 227)
				printf("\\P ") ;
			else if ( c == 230)
				printf("$\\mu$") ;
			else if ( c == 241)
				printf("$\\pm$") ;
			else if ( c == 246)
				printf("$\\div$") ;
			else if ( c == 253)
				printf("$^2$") ;
			else
				putchar(c) ;
		}
	}
	return 0 ;
}