update v1.0.3.3
This commit is contained in:
61
vendor/neitanod/forceutf8/README.md
vendored
Normal file
61
vendor/neitanod/forceutf8/README.md
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
forceutf8
|
||||
=========
|
||||
|
||||
PHP Class Encoding featuring popular \ForceUTF8\Encoding::toUTF8() function --formerly known as forceUTF8()-- that fixes mixed encoded strings.
|
||||
|
||||
Description
|
||||
===========
|
||||
|
||||
If you apply the PHP function utf8_encode() to an already-UTF8 string it will return a garbled UTF8 string.
|
||||
|
||||
This class addresses this issue and provides a handy static function called \ForceUTF8\Encoding::toUTF8().
|
||||
|
||||
You don't need to know what the encoding of your strings is. It can be Latin1 (iso 8859-1), Windows-1252 or UTF8, or the string can have a mix of them. \ForceUTF8\Encoding::toUTF8() will convert everything to UTF8.
|
||||
|
||||
Sometimes you have to deal with services that are unreliable in terms of encoding, possibly mixing UTF8 and Latin1 in the same string.
|
||||
|
||||
Update:
|
||||
|
||||
I've included another function, \ForceUTF8\Encoding::fixUTF8(), which will fix the double (or multiple) encoded UTF8 string that looks garbled.
|
||||
|
||||
Usage:
|
||||
======
|
||||
|
||||
use \ForceUTF8\Encoding;
|
||||
|
||||
$utf8_string = Encoding::toUTF8($utf8_or_latin1_or_mixed_string);
|
||||
|
||||
$latin1_string = Encoding::toLatin1($utf8_or_latin1_or_mixed_string);
|
||||
|
||||
also:
|
||||
|
||||
$utf8_string = Encoding::fixUTF8($garbled_utf8_string);
|
||||
|
||||
Examples:
|
||||
|
||||
use \ForceUTF8\Encoding;
|
||||
|
||||
echo Encoding::fixUTF8("Fédération Camerounaise de Football\n");
|
||||
echo Encoding::fixUTF8("Fédération Camerounaise de Football\n");
|
||||
echo Encoding::fixUTF8("Fédération Camerounaise de Football\n");
|
||||
echo Encoding::fixUTF8("Fédération Camerounaise de Football\n");
|
||||
|
||||
will output:
|
||||
|
||||
Fédération Camerounaise de Football
|
||||
Fédération Camerounaise de Football
|
||||
Fédération Camerounaise de Football
|
||||
Fédération Camerounaise de Football
|
||||
|
||||
Install via composer:
|
||||
=====================
|
||||
Edit your composer.json file to include the following:
|
||||
|
||||
```json
|
||||
{
|
||||
"require": {
|
||||
"neitanod/forceutf8": "dev-master"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
20
vendor/neitanod/forceutf8/composer.json
vendored
Normal file
20
vendor/neitanod/forceutf8/composer.json
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "neitanod/forceutf8",
|
||||
"homepage": "https://github.com/neitanod/forceutf8",
|
||||
"type": "library",
|
||||
"description": "PHP Class Encoding featuring popular Encoding::toUTF8() function --formerly known as forceUTF8()-- that fixes mixed encoded strings.",
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"authors": [
|
||||
{
|
||||
"name": "Sebastián Grignoli",
|
||||
"email": "grignoli@gmail.com"
|
||||
}
|
||||
],
|
||||
"autoload": {
|
||||
"psr-0": {
|
||||
"ForceUTF8\\": "src/"
|
||||
}
|
||||
}
|
||||
}
|
347
vendor/neitanod/forceutf8/src/ForceUTF8/Encoding.php
vendored
Normal file
347
vendor/neitanod/forceutf8/src/ForceUTF8/Encoding.php
vendored
Normal file
@@ -0,0 +1,347 @@
|
||||
<?php
|
||||
/*
|
||||
Copyright (c) 2008 Sebastián Grignoli
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of copyright holders nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author "Sebastián Grignoli" <grignoli@gmail.com>
|
||||
* @package Encoding
|
||||
* @version 2.0
|
||||
* @link https://github.com/neitanod/forceutf8
|
||||
* @example https://github.com/neitanod/forceutf8
|
||||
* @license Revised BSD
|
||||
*/
|
||||
|
||||
namespace ForceUTF8;
|
||||
|
||||
class Encoding {
|
||||
|
||||
const ICONV_TRANSLIT = "TRANSLIT";
|
||||
const ICONV_IGNORE = "IGNORE";
|
||||
const WITHOUT_ICONV = "";
|
||||
|
||||
protected static $win1252ToUtf8 = array(
|
||||
128 => "\xe2\x82\xac",
|
||||
|
||||
130 => "\xe2\x80\x9a",
|
||||
131 => "\xc6\x92",
|
||||
132 => "\xe2\x80\x9e",
|
||||
133 => "\xe2\x80\xa6",
|
||||
134 => "\xe2\x80\xa0",
|
||||
135 => "\xe2\x80\xa1",
|
||||
136 => "\xcb\x86",
|
||||
137 => "\xe2\x80\xb0",
|
||||
138 => "\xc5\xa0",
|
||||
139 => "\xe2\x80\xb9",
|
||||
140 => "\xc5\x92",
|
||||
|
||||
142 => "\xc5\xbd",
|
||||
|
||||
|
||||
145 => "\xe2\x80\x98",
|
||||
146 => "\xe2\x80\x99",
|
||||
147 => "\xe2\x80\x9c",
|
||||
148 => "\xe2\x80\x9d",
|
||||
149 => "\xe2\x80\xa2",
|
||||
150 => "\xe2\x80\x93",
|
||||
151 => "\xe2\x80\x94",
|
||||
152 => "\xcb\x9c",
|
||||
153 => "\xe2\x84\xa2",
|
||||
154 => "\xc5\xa1",
|
||||
155 => "\xe2\x80\xba",
|
||||
156 => "\xc5\x93",
|
||||
|
||||
158 => "\xc5\xbe",
|
||||
159 => "\xc5\xb8"
|
||||
);
|
||||
|
||||
protected static $brokenUtf8ToUtf8 = array(
|
||||
"\xc2\x80" => "\xe2\x82\xac",
|
||||
|
||||
"\xc2\x82" => "\xe2\x80\x9a",
|
||||
"\xc2\x83" => "\xc6\x92",
|
||||
"\xc2\x84" => "\xe2\x80\x9e",
|
||||
"\xc2\x85" => "\xe2\x80\xa6",
|
||||
"\xc2\x86" => "\xe2\x80\xa0",
|
||||
"\xc2\x87" => "\xe2\x80\xa1",
|
||||
"\xc2\x88" => "\xcb\x86",
|
||||
"\xc2\x89" => "\xe2\x80\xb0",
|
||||
"\xc2\x8a" => "\xc5\xa0",
|
||||
"\xc2\x8b" => "\xe2\x80\xb9",
|
||||
"\xc2\x8c" => "\xc5\x92",
|
||||
|
||||
"\xc2\x8e" => "\xc5\xbd",
|
||||
|
||||
|
||||
"\xc2\x91" => "\xe2\x80\x98",
|
||||
"\xc2\x92" => "\xe2\x80\x99",
|
||||
"\xc2\x93" => "\xe2\x80\x9c",
|
||||
"\xc2\x94" => "\xe2\x80\x9d",
|
||||
"\xc2\x95" => "\xe2\x80\xa2",
|
||||
"\xc2\x96" => "\xe2\x80\x93",
|
||||
"\xc2\x97" => "\xe2\x80\x94",
|
||||
"\xc2\x98" => "\xcb\x9c",
|
||||
"\xc2\x99" => "\xe2\x84\xa2",
|
||||
"\xc2\x9a" => "\xc5\xa1",
|
||||
"\xc2\x9b" => "\xe2\x80\xba",
|
||||
"\xc2\x9c" => "\xc5\x93",
|
||||
|
||||
"\xc2\x9e" => "\xc5\xbe",
|
||||
"\xc2\x9f" => "\xc5\xb8"
|
||||
);
|
||||
|
||||
protected static $utf8ToWin1252 = array(
|
||||
"\xe2\x82\xac" => "\x80",
|
||||
|
||||
"\xe2\x80\x9a" => "\x82",
|
||||
"\xc6\x92" => "\x83",
|
||||
"\xe2\x80\x9e" => "\x84",
|
||||
"\xe2\x80\xa6" => "\x85",
|
||||
"\xe2\x80\xa0" => "\x86",
|
||||
"\xe2\x80\xa1" => "\x87",
|
||||
"\xcb\x86" => "\x88",
|
||||
"\xe2\x80\xb0" => "\x89",
|
||||
"\xc5\xa0" => "\x8a",
|
||||
"\xe2\x80\xb9" => "\x8b",
|
||||
"\xc5\x92" => "\x8c",
|
||||
|
||||
"\xc5\xbd" => "\x8e",
|
||||
|
||||
|
||||
"\xe2\x80\x98" => "\x91",
|
||||
"\xe2\x80\x99" => "\x92",
|
||||
"\xe2\x80\x9c" => "\x93",
|
||||
"\xe2\x80\x9d" => "\x94",
|
||||
"\xe2\x80\xa2" => "\x95",
|
||||
"\xe2\x80\x93" => "\x96",
|
||||
"\xe2\x80\x94" => "\x97",
|
||||
"\xcb\x9c" => "\x98",
|
||||
"\xe2\x84\xa2" => "\x99",
|
||||
"\xc5\xa1" => "\x9a",
|
||||
"\xe2\x80\xba" => "\x9b",
|
||||
"\xc5\x93" => "\x9c",
|
||||
|
||||
"\xc5\xbe" => "\x9e",
|
||||
"\xc5\xb8" => "\x9f"
|
||||
);
|
||||
|
||||
static function toUTF8($text){
|
||||
/**
|
||||
* Function \ForceUTF8\Encoding::toUTF8
|
||||
*
|
||||
* This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
|
||||
*
|
||||
* It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
|
||||
*
|
||||
* It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
|
||||
*
|
||||
* 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
|
||||
* are followed by any of these: ("group B")
|
||||
* ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿
|
||||
* For example: %ABREPRESENT%C9%BB. «REPRESENTÉ»
|
||||
* The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
|
||||
* is also a valid unicode character, and will be left unchanged.
|
||||
*
|
||||
* 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B,
|
||||
* 3) when any of these: ðñòó are followed by THREE chars from group B.
|
||||
*
|
||||
* @name toUTF8
|
||||
* @param string $text Any string.
|
||||
* @return string The same string, UTF8 encoded
|
||||
*
|
||||
*/
|
||||
|
||||
if(is_array($text))
|
||||
{
|
||||
foreach($text as $k => $v)
|
||||
{
|
||||
$text[$k] = self::toUTF8($v);
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
if(!is_string($text)) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
$max = self::strlen($text);
|
||||
|
||||
$buf = "";
|
||||
for($i = 0; $i < $max; $i++){
|
||||
$c1 = $text{$i};
|
||||
if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
|
||||
$c2 = $i+1 >= $max? "\x00" : $text{$i+1};
|
||||
$c3 = $i+2 >= $max? "\x00" : $text{$i+2};
|
||||
$c4 = $i+3 >= $max? "\x00" : $text{$i+3};
|
||||
if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
|
||||
if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
|
||||
$buf .= $c1 . $c2;
|
||||
$i++;
|
||||
} else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
|
||||
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
|
||||
$buf .= $c1 . $c2 . $c3;
|
||||
$i = $i + 2;
|
||||
} else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
|
||||
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
|
||||
$buf .= $c1 . $c2 . $c3 . $c4;
|
||||
$i = $i + 3;
|
||||
} else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} else { //doesn't look like UTF8, but should be converted
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} elseif(($c1 & "\xc0") == "\x80"){ // needs conversion
|
||||
if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
|
||||
$buf .= self::$win1252ToUtf8[ord($c1)];
|
||||
} else {
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} else { // it doesn't need conversion
|
||||
$buf .= $c1;
|
||||
}
|
||||
}
|
||||
return $buf;
|
||||
}
|
||||
|
||||
static function toWin1252($text, $option = self::WITHOUT_ICONV) {
|
||||
if(is_array($text)) {
|
||||
foreach($text as $k => $v) {
|
||||
$text[$k] = self::toWin1252($v, $option);
|
||||
}
|
||||
return $text;
|
||||
} elseif(is_string($text)) {
|
||||
return static::utf8_decode($text, $option);
|
||||
} else {
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
static function toISO8859($text) {
|
||||
return self::toWin1252($text);
|
||||
}
|
||||
|
||||
static function toLatin1($text) {
|
||||
return self::toWin1252($text);
|
||||
}
|
||||
|
||||
static function fixUTF8($text, $option = self::WITHOUT_ICONV){
|
||||
if(is_array($text)) {
|
||||
foreach($text as $k => $v) {
|
||||
$text[$k] = self::fixUTF8($v, $option);
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
$last = "";
|
||||
while($last <> $text){
|
||||
$last = $text;
|
||||
$text = self::toUTF8(static::utf8_decode($text, $option));
|
||||
}
|
||||
$text = self::toUTF8(static::utf8_decode($text, $option));
|
||||
return $text;
|
||||
}
|
||||
|
||||
static function UTF8FixWin1252Chars($text){
|
||||
// If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
|
||||
// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
|
||||
// See: http://en.wikipedia.org/wiki/Windows-1252
|
||||
|
||||
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
|
||||
}
|
||||
|
||||
static function removeBOM($str=""){
|
||||
if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) {
|
||||
$str=substr($str, 3);
|
||||
}
|
||||
return $str;
|
||||
}
|
||||
|
||||
protected static function strlen($text){
|
||||
return (function_exists('mb_strlen') && ((int) ini_get('mbstring.func_overload')) & 2) ?
|
||||
mb_strlen($text,'8bit') : strlen($text);
|
||||
}
|
||||
|
||||
public static function normalizeEncoding($encodingLabel)
|
||||
{
|
||||
$encoding = strtoupper($encodingLabel);
|
||||
$encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
|
||||
$equivalences = array(
|
||||
'ISO88591' => 'ISO-8859-1',
|
||||
'ISO8859' => 'ISO-8859-1',
|
||||
'ISO' => 'ISO-8859-1',
|
||||
'LATIN1' => 'ISO-8859-1',
|
||||
'LATIN' => 'ISO-8859-1',
|
||||
'UTF8' => 'UTF-8',
|
||||
'UTF' => 'UTF-8',
|
||||
'WIN1252' => 'ISO-8859-1',
|
||||
'WINDOWS1252' => 'ISO-8859-1'
|
||||
);
|
||||
|
||||
if(empty($equivalences[$encoding])){
|
||||
return 'UTF-8';
|
||||
}
|
||||
|
||||
return $equivalences[$encoding];
|
||||
}
|
||||
|
||||
public static function encode($encodingLabel, $text)
|
||||
{
|
||||
$encodingLabel = self::normalizeEncoding($encodingLabel);
|
||||
if($encodingLabel == 'ISO-8859-1') return self::toLatin1($text);
|
||||
return self::toUTF8($text);
|
||||
}
|
||||
|
||||
protected static function utf8_decode($text, $option)
|
||||
{
|
||||
if ($option == self::WITHOUT_ICONV || !function_exists('iconv')) {
|
||||
$o = utf8_decode(
|
||||
str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))
|
||||
);
|
||||
} else {
|
||||
$o = iconv("UTF-8", "Windows-1252" . ($option == self::ICONV_TRANSLIT ? '//TRANSLIT' : ($option == self::ICONV_IGNORE ? '//IGNORE' : '')), $text);
|
||||
}
|
||||
return $o;
|
||||
}
|
||||
}
|
101
vendor/neitanod/forceutf8/test/ForceUTF8Test.php
vendored
Normal file
101
vendor/neitanod/forceutf8/test/ForceUTF8Test.php
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
<?php
|
||||
require_once(dirname(__FILE__)."/Test.class.php");
|
||||
require_once(dirname(dirname(__FILE__))."/src/ForceUTF8/Encoding.php");
|
||||
|
||||
use \ForceUTF8\Encoding;
|
||||
|
||||
// Test the testing class itself.
|
||||
Test::is("'yes' is true", 'yes', true);
|
||||
Test::not("1 is not false", 1, false);
|
||||
Test::identical("true is identical to true", true, true);
|
||||
Test::true("1 is true", 1);
|
||||
|
||||
// ForceUTF8 tests.
|
||||
Test::not("Source files must not use the same encoding before conversion.",
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1Latin.txt"));
|
||||
|
||||
Test::identical("Simple Encoding works.",
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
Encoding::toUTF8(file_get_contents(dirname(__FILE__)."/data/test1Latin.txt")));
|
||||
|
||||
function test_arrays_are_different(){
|
||||
$arr1 = array(
|
||||
file_get_contents(dirname(__FILE__)."/data/test1Latin.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1Latin.txt"));
|
||||
$arr2 = array(
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"));
|
||||
return $arr1 != $arr2;
|
||||
}
|
||||
|
||||
function test_encoding_of_arrays(){
|
||||
$arr1 = array(
|
||||
file_get_contents(dirname(__FILE__)."/data/test1Latin.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1Latin.txt"));
|
||||
$arr2 = array(
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"));
|
||||
return Encoding::toUTF8($arr1) == $arr2;
|
||||
}
|
||||
|
||||
Test::true("Source arrays are different.", test_arrays_are_different());
|
||||
Test::true("Encoding of array works.", test_encoding_of_arrays());
|
||||
|
||||
Test::identical("fixUTF8() maintains UTF-8 string.",
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
Encoding::fixUTF8(file_get_contents(dirname(__FILE__)."/data/test1.txt")));
|
||||
|
||||
Test::not("An UTF-8 double encoded string differs from a correct UTF-8 string.",
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
utf8_encode(file_get_contents(dirname(__FILE__)."/data/test1.txt")));
|
||||
|
||||
Test::identical("fixUTF8() reverts to UTF-8 a double encoded string.",
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
Encoding::fixUTF8(utf8_encode(file_get_contents(dirname(__FILE__)."/data/test1.txt"))));
|
||||
|
||||
function test_double_encoded_arrays_are_different(){
|
||||
$arr1 = array(
|
||||
utf8_encode(file_get_contents(dirname(__FILE__)."/data/test1Latin.txt")),
|
||||
utf8_encode(file_get_contents(dirname(__FILE__)."/data/test1.txt")),
|
||||
utf8_encode(file_get_contents(dirname(__FILE__)."/data/test1Latin.txt")));
|
||||
$arr2 = array(
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"));
|
||||
return $arr1 != $arr2;
|
||||
}
|
||||
|
||||
function test_double_encoded_arrays_fix(){
|
||||
$arr1 = array(
|
||||
utf8_encode(file_get_contents(dirname(__FILE__)."/data/test1Latin.txt")),
|
||||
utf8_encode(file_get_contents(dirname(__FILE__)."/data/test1.txt")),
|
||||
utf8_encode(file_get_contents(dirname(__FILE__)."/data/test1Latin.txt")));
|
||||
$arr2 = array(
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"),
|
||||
file_get_contents(dirname(__FILE__)."/data/test1.txt"));
|
||||
return Encoding::fixUTF8($arr1) == $arr2;
|
||||
}
|
||||
|
||||
Test::true("Source arrays are different (fixUTF8).", test_double_encoded_arrays_are_different());
|
||||
Test::true("Fixing of double encoded array works.", test_double_encoded_arrays_fix());
|
||||
|
||||
Test::identical("fixUTF8() Example 1 still working.",
|
||||
Encoding::fixUTF8("Fédération Camerounaise de Football\n"),
|
||||
"Fédération Camerounaise de Football\n");
|
||||
Test::identical("fixUTF8() Example 2 still working.",
|
||||
Encoding::fixUTF8("Fédération Camerounaise de Football\n"),
|
||||
"Fédération Camerounaise de Football\n");
|
||||
Test::identical("fixUTF8() Example 3 still working.",
|
||||
Encoding::fixUTF8("Fédération Camerounaise de Football\n"),
|
||||
"Fédération Camerounaise de Football\n");
|
||||
Test::identical("fixUTF8() Example 4 still working.",
|
||||
Encoding::fixUTF8("Fédération Camerounaise de Football\n"),
|
||||
"Fédération Camerounaise de Football\n");
|
||||
|
||||
Test::totals();
|
62
vendor/neitanod/forceutf8/test/Test.class.php
vendored
Normal file
62
vendor/neitanod/forceutf8/test/Test.class.php
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
<?php
|
||||
class Test {
|
||||
protected static $passed = 0;
|
||||
protected static $failed = 0;
|
||||
protected static $last_echoed;
|
||||
|
||||
public static function true($test_name, $result){
|
||||
return static::is($test_name, $result, TRUE);
|
||||
}
|
||||
|
||||
public static function is($test_name, $result, $expected){
|
||||
if($result == $expected) {
|
||||
static::passed($test_name);
|
||||
} else {
|
||||
static::failed($test_name);
|
||||
}
|
||||
}
|
||||
|
||||
public static function not($test_name, $result, $expected){
|
||||
if($result == $expected) {
|
||||
static::failed($test_name);
|
||||
} else {
|
||||
static::passed($test_name);
|
||||
}
|
||||
}
|
||||
|
||||
public static function identical($test_name, $result, $expected){
|
||||
if($result === $expected) {
|
||||
static::passed($test_name);
|
||||
} else {
|
||||
static::failed($test_name);
|
||||
}
|
||||
}
|
||||
|
||||
public static function totals(){
|
||||
echo "\n";
|
||||
echo static::$passed." tests passed.\n";
|
||||
echo static::$failed." tests failed.\n";
|
||||
}
|
||||
|
||||
private static function failed($test_name){
|
||||
echo "\n".$test_name." -> FAILED\n";
|
||||
static::$failed++;
|
||||
}
|
||||
|
||||
private static function passed($test_name){
|
||||
static::character(".");
|
||||
static::$passed++;
|
||||
}
|
||||
|
||||
private static function character($char){
|
||||
echo $char;
|
||||
static::$last_echoed = 'char';
|
||||
}
|
||||
|
||||
private static function line($msg){
|
||||
if(static::$last_echoed == 'char') echo "\n";
|
||||
echo $msg."\n";
|
||||
static::$last_echoed = 'line';
|
||||
}
|
||||
}
|
||||
|
1
vendor/neitanod/forceutf8/test/data/russian.txt
vendored
Normal file
1
vendor/neitanod/forceutf8/test/data/russian.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
hello žš, привет
|
1
vendor/neitanod/forceutf8/test/data/test1.txt
vendored
Normal file
1
vendor/neitanod/forceutf8/test/data/test1.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
Hírek
|
1
vendor/neitanod/forceutf8/test/data/test1Latin.txt
vendored
Normal file
1
vendor/neitanod/forceutf8/test/data/test1Latin.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
H<EFBFBD>rek
|
Reference in New Issue
Block a user