#StripControlChars Global Filter Plugin # version 0.3 # Copyright 2003,2004, Jacques Distler. # # usage: adds a global filter attribute to any MT variable substitution Tag, # # <$MTEntryBody strip_controlchars="1"$> # # strips the characters Ox00-Ox08, Ox0B, Ox0C, Ox0F-Ox1F and Ox80-Ox9F. # # <$MTEntryBody strip_controlchars="2"$> # # converts the Windows-1252 characters Ox80 to Ox9F to the corresponding # Unicode numeric entities, while stripping out the rest. # Hopefully, that was what was intended. package MT::Plugin::StripControlChars; use MT::Template::Context; MT::Template::Context->add_global_filter(strip_controlchars => sub { &strip_controlchars }); my %windows_1252 = ( '\x00' => '', '\x01' => '', '\x02' => '', '\x03' => '', '\x04' => '', '\x05' => '', '\x06' => '', '\x07' => '', '\x08' => '', '\x0B' => '', '\x0C' => '', '\x0E' => '', '\x0F' => '', '\x10' => '', '\x11' => '', '\x12' => '', '\x13' => '', '\x14' => '', '\x15' => '', '\x16' => '', '\x17' => '', '\x18' => '', '\x19' => '', '\x1A' => '', '\x1B' => '', '\x1C' => '', '\x1D' => '', '\x1E' => '', '\x1F' => '', '\x7F' => '', '\x80' => '€', '\x81' => '', '\x82' => '‚', '\x83' => 'ƒ', '\x84' => '„', '\x85' => '…', '\x86' => '†', '\x87' => '‡', '\x88' => 'ˆ', '\x89' => '‰', '\x8A' => 'Š', '\x8B' => '‹', '\x8C' => 'Œ', '\x8D' => '', '\x8E' => 'Ž', '\x8F' => '', '\x90' => '', '\x91' => '‘', '\x92' => '’', '\x93' => '“', '\x94' => '”', '\x95' => '•', '\x96' => '–', '\x97' => '—', '\x98' => '˜', '\x99' => '™', '\x9A' => 'š', '\x9B' => '›', '\x9C' => 'œ', '\x9D' => 'Ž', '\x9E' => 'ž', '\x9F' => 'Ÿ', '\xA9' => '©', '\xAE' => '®', '\xA3' => '¢', '\xA4' => '£', '\xA5' => '¤', '\xA5' => '¥', '\xB0' => '°', '\xB1' => '±', '\xB2' => '²', '\xB3' => '³', '\xB4' => '´', '\xB5' => 'µ', '\xB6' => '¶', '\xB7' => '·', '\xBC' => '¼', '\xBD' => '½', '\xBE' => '¾', '\xE9' => 'é', '\xE0' => 'à', '\xE1' => 'á', '\xE2' => 'â', '\xE3' => 'ã', '\xE4' => 'ä', '\xE5' => 'å', '\xE6' => 'æ', '\xE7' => 'ç', '\xE8' => 'è', '\xE9' => 'é', '\xEA' => 'ê', '\xEB' => 'ë', '\xEC' => 'ì', '\xED' => 'í', '\xEE' => 'î', '\xEF' => 'ï', '\xF0' => 'ð', '\xF1' => 'ñ', '\xF2' => 'ò', '\xF3' => 'ó', '\xF4' => 'ô', '\xF5' => 'õ', '\xF6' => 'ö', '\xF7' => '÷', '\xF8' => 'ø', '\xF9' => 'ù', '\xFA' => 'ú', '\xFB' => 'û', '\xFC' => 'ü', '\xFD' => 'ý', '\xFE' => 'þ', '\xFF' => 'ÿ' ); sub strip_controlchars { my $text = shift; my $arg_value = shift; my $ctx = shift; if ($arg_value == 1) { $text =~ s/[\x00-\x08\x0B\x0C\x0E\x0F\x10-\x1F\x80-\x9F]//g; } elsif ($arg_value == 2) { my $controlchar; foreach $controlchar (sort keys(%windows_1252)) { $text =~ s/$controlchar/$windows_1252{$controlchar}/g; } } return $text; } 1;