SlideShare a Scribd company logo
First play and test

<?php

//note that this script file is UTF-8

//UTF-8 CLI assumed, else you'll need this:
//header("Content-Type: text/html; charset=UTF-8;");

//'hi' is Hindi, 'fa' is Farsi, 'ar_EG' is Egyptian Arabic
$locales = array('en', 'en_US', 'fr_FR', 'de_DE', 'hi', 'fa', 'ar_EG');

$number = 1234567890;

foreach($locales as $locale)
{
     $formatter = new NumberFormatter($locale, NumberFormatter::DECIMAL);
     echo $locale . ":t" . $formatter->format($number) . "n";
}

//Output:
//en:            1,234,567,890
//en_US:    1,234,567,890
//fr_FR:    1 234 567 890
//de_DE:    1.234.567.890
//hi:            १,२३,४५,६७,८९०
//fa:            ۱٬۲۳۴٬۵۶۷٬۸۹۰
//ar_EG:    ١٬٢٣٤٬٥٦٧٬٨٩٠

?>

Sorting German

<?php

//note that this script file is UTF-8

//UTF-8 CLI assumed, else you'll need this:
//header("Content-Type: text/html; charset=UTF-8;");

//some German surnames
$german_names = array('Weiß', 'Goldmann', 'Göbel', 'Weiss', 'Göthe',
'Goethe', 'Götz');

sort($german_names);

//gives Array ( [0] => Goethe [1] => Goldmann [2] => Göbel [3] => Göthe
//[4] => Götz [5] => Weiss [6] => Weiß )
//which COINCIDENTALLY is the Austrian sort order
print_r($german_names);

sort($german_names, SORT_STRING); //default is SORT_REGULAR
print_r($german_names); //gives same as above

//BTW, you're not going to get far with setlocale() if you don't
//have that particular locale supported on your OS!
//on *nixes, something like:
//> locale --all-locales
//will give you a list of all installed locales
//
//you can give setlocale() a *list* of locales to try if
//you're not sure how your OS is spelling it etc
//
//ICU (and therefore Intl) uses its own locales and is not dependent
//on the operating system for the locale data

setlocale(LC_ALL, 'de_DE');
sort($german_names, SORT_LOCALE_STRING);
print_r($german_names);
//above gives Array ( [0] => Göbel [1] => Göthe [2] => Götz [3] => Goethe
//[4] => Goldmann [5] => Weiß [6] => Weiss )
//which isn't dictionary, phonebook or Austrian sort order :(
//[it seems to be "umlauted vowel comes before plain vowel, eszett
//comes before double ess" order]

setlocale(LC_ALL, 'de_AT');
sort($german_names, SORT_LOCALE_STRING);
print_r($german_names);
//above gives same as for de_DE - ie. nothing good :(

//this is curious...
setlocale(LC_ALL, 'de_DE.utf8');
sort($german_names, SORT_LOCALE_STRING);
print_r($german_names);
//above gives Array ( [0] => Göbel [1] => Goethe [2] => Goldmann
//[3] => Göthe [4] => Götz [5] => Weiss [6] => Weiß )
//which is our dictionary sort order!

//----Let's try using Intl--------

$coll = new Collator('de_DE');

$coll->sort($german_names);

//gives Array ( [0] => Göbel [1] => Goethe [2] => Goldmann [3] => Göthe
//[4] => Götz [5] => Weiss [6] => Weiß )
//which is our dictionary sort order!
print_r($german_names);

//Collator constructor can accept UCA keywords
$coll = new Collator('de@collation=phonebook'); //see
http://userguide.icu-project.org/collation/architecture and
http://userguide.icu-project.org/locale

$coll->sort($german_names);

//gives Array ( [0] => Göbel [1] => Goethe [2] => Göthe [3] => Götz
//[4] => Goldmann [5] => Weiss [6] => Weiß )
//which is our phonebook sort order!
print_r($german_names);

?>
Japanese era

<?php

//note that this script file is UTF-8

//UTF-8 CLI assumed, else you'll need this:
//header("Content-Type: text/html; charset=UTF-8;");

$timezones = array('en_GB' => 'Europe/London', 'ja_JP' => 'Asia/Tokyo',
     'ja_JP@calendar=japanese' => 'Asia/Tokyo');

$now = new DateTime();   //DateTime is a core PHP class as of version 5.2.0

foreach($timezones as $locale => $timezone)
{
     $calendar = IntlDateFormatter::GREGORIAN;

     if(strpos($locale, 'calendar=') !== false)
     {
          //slightly presumptuous as @calendar=gregorian also exists
          $calendar = IntlDateFormatter::TRADITIONAL;
     }

     $formatter = new IntlDateFormatter($locale, IntlDateFormatter::FULL,
          IntlDateFormatter::FULL, $timezone, $calendar);

     echo 'It is now: "' . $formatter->format($now) . '" in ' .
"{$timezone}n";
}

//Last line of output gives "平成 23 年" which is Heisei 23!

?>

Korean numbers

<?php

//note that this script file is UTF-8

//UTF-8 CLI assumed, else you'll need this:
//header("Content-Type: text/html; charset=UTF-8;");

//See:
//http://en.wikipedia.org/wiki/Korean_numerals
//http://askakorean.blogspot.com/2010/03/korean-language-series-sino-
korean.html

$number = 1234567890;

$formatter = new NumberFormatter('ko_KR', NumberFormatter::SPELLOUT);

echo "Korean spellout ({$formatter->getLocale()}):t"
     . $formatter->format($number) . "n";
//above gives [Korean spellout (en_GB): one thousand two hundred and
thirty-four
//million, five hundred and sixty-seven thousand, eight hundred and
ninety]
//ie. locale has fallen back to system default (in this case en_GB)

//but ko_KR is a valid ICU locale string, so let's check:
$formatter = new NumberFormatter('ko_KR', NumberFormatter::CURRENCY);

echo "Korean currency ({$formatter->getLocale()}):t"
     . $formatter->format($number) . "n";
//above gives [Korean currency (ko):    ₩1,234,567,890] which is correct

//ok, so it looks like we don't have the rules for Korean spellout
//we'll have to supply the NumberFormatter with our own ruleset.
//the technical details of the ruleset format are at
//http://userguide.icu-project.org/formatparse/numbers/rbnf-examples
//BUT we *do* have a ruleset for Japanese spellout which we can modify and
use!
//(it's similar because it also counts in ten thousands and has non-Arabic
numerals)
//the Japanese spellout ruleset is this (construct a NumberFormatter for
Japanese
//spellout and then var_dump($formatter->getPattern())):
//
//
//pattern for japanese spellout
/*
 string(1520) "%financial:
    0: 零;
    1: 壱;
    2: 弐;
    3: 参;
    4: 四;
    5: 伍;
    6: 六;
    7: 七;
    8: 八;
    9: 九;
    10: 拾;
    11: 拾>%financial>;
    20: <%financial<拾;
    21: <%financial<拾>%financial>;
    100: <%financial<百;
    101: <%financial<百>%financial>;
    1000: <%financial<千;
    1001: <%financial<千>%financial>;
    10000: <%financial<萬;
    10001: <%financial<萬>%financial>;
    100000000: <%financial<億;
    100000001: <%financial<億>%financial>;
1000000000000: <%financial<兆;
     1000000000001: <%financial<兆>%financial>;
     10000000000000000: =#,##0=;
     -x: マイナス>%financial>;
    x.x: <%financial<点>%financial>;
%traditional:
     0: 〇;
     1: 一;
     2: 二;
     3: 三;
     4: 四;
     5: 五;
     6: 六;
     7: 七;
     8: 八;
     9: 九;
     10: 十;
     11: 十>%traditional>;
     20: <%traditional<十;
     21: <%traditional<十>%traditional>;
     100: 百;
     101: 百>%traditional>;
     200: <%traditional<百;
     201: <%traditional<百>%traditional>;
     1000: 千;
     1001: 千>%traditional>;
     2000: <%traditional<千;
     2001: <%traditional<千>%traditional>;
     10000: <%traditional<万;
     10001: <%traditional<万>%traditional>;
     100000000: <%traditional<億;
     100000001: <%traditional<億>%traditional>;
     1000000000000: <%traditional<兆;
     1000000000001: <%traditional<兆>%traditional>;
     10000000000000000: =#,##0=;
     -x: マイナス>%traditional>;
     x.x: <%traditional<・>%traditional>;
"
*/

//basically, for Japanese we count in groups of ten thousands
//and we have a traditional set of characters
//and a financial (anti-forgery) set of characters.
//
//Korean also counts in groups of ten thousands
//so let's modify this pattern for Korean
//(note that we'll use only South Korean Sino-Korean numbers)

$korean_pattern = '%hangul:
     0: 영;    1: 일;   2: 이;   3: 삼;   4: 사;   5: 오;   6: 육;   7: 칠;   8: 팔;   9:
구;   10: 십;
     11: 십>%hangul>;
     20: <%hangul<십;
     21: <%hangul<십>%hangul>;
     100:    백;
     101:    백>%hangul>;
     200: <%hangul<백;
     201: <%hangul<백>%hangul>;
     1000:    천;
     1001:    천>%hangul>;
     2000: <%hangul<천;
     2001: <%hangul<천>%hangul>;
     10000: <%hangul<만;
     10001: <%hangul<만>%hangul>;
     100000000: <%hangul<억;
     100000001: <%hangul<억>%hangul>;
     1000000000000: <%hangul<조;
     1000000000001: <%hangul<조>%hangul>;
     10000000000000000: =#,##0=;
     -x: ->%hangul>;
     x.x: <%hangul<・>%hangul>;
%hanja:
     0: 〇;
     1: 一;
     2: 二;
     3: 三;
     4: 四;
     5: 五;
     6: 六;
     7: 七;
     8: 八;
     9: 九;
     10: 十;
     11: 十>%hanja>;
     20: <%hanja<十;
     21: <%hanja<十>%hanja>;
     100: 百;
     101: 百>%hanja>;
200: <%hanja<百;
     201: <%hanja<百>%hanja>;
     1000: 千;
     1001: 千>%hanja>;
     2000: <%hanja<千;
     2001: <%hanja<千>%hanja>;
     10000: <%hanja<萬;
     10001: <%hanja<萬>%hanja>;
     100000000: <%hanja<億;
     100000001: <%hanja<億>%hanja>;
     1000000000000: <%hanja<兆;
     1000000000001: <%hanja<兆>%hanja>;
     10000000000000000: =#,##0=;
     -x: ->%hanja>;
     x.x: <%hanja<・>%hanja>;';

$formatter = new NumberFormatter('ko_KR',
NumberFormatter::PATTERN_RULEBASED,
     $korean_pattern);

$formatter->setTextAttribute(NumberFormatter::DEFAULT_RULESET, "%hangul");

$numbers = array_merge(range(0, 20), range(30, 100, 10),
     array(1000, 10000, 100000000, 1000000000000));

foreach($numbers as $number)
{
     echo "{$number}: {$formatter->format($number)}n";
}

//outputs a correct list of Korean Hangul numbers

?>

strftime (not on presentation)

<?php

//note that this script file is UTF-8

//UTF-8 CLI assumed, else you'll need this:
//header("Content-Type: text/html; charset=UTF-8;");

//out of curiosity, let's see how PHP's core strftime() handles
//some different locale date formats

//BTW, you're not going to get far with setlocale() if you don't
//have that particular locale supported on your OS!
//on *nixes, something like:
//> locale --all-locales
//will give you a list of all installed locales
//
//you can give setlocale() a *list* of locales to try if
//you're not sure how your OS is spelling it etc
//
//ICU (and therefore Intl) uses its own locales and is not dependent
//on the operating system for the locale data

$locales = array(array('fi_FI.utf8', 'fi_FI'), array('ja_JP.utf8',
'ja_JP'),
     array('fr_FR.utf8', 'fr_FR'));

$format = "%A %d %B %Y"; //ie. "Wednesday 17 August 2011"

foreach($locales as $locale_array)
{
     $locale = setlocale(LC_TIME, $locale_array);
     echo "{$locale}:t" . strftime($format) . "n";
}

//Output:
//fi_FI.utf8:   keskiviikko 17 elokuu 2011
//ja_JP.utf8:   水曜日 17 8 月 2011
//fr_FR.utf8:   mercredi 17 août 2011

//Comment:
//Not bad at all. The Japanese date won't be very natural-looking for a
//native Japanese speaker as the day and year aren't quantified with the
//appropriate character.
//Also seem to be some implementation issues
(http://uk.php.net/manual/en/function.strftime.php)
//Intl extension is giving us more power and flexibility

?>

Japanese financial numbers (not on presentation)

<?php

//note that this script file is UTF-8

//UTF-8 CLI assumed, else you'll need this:
//header("Content-Type: text/html; charset=UTF-8;");

//SUPPORTED LOCALES FOR "SPELLOUT" IS A LOT MORE LIMITED
//THAN FOR DECIMAL OR CURRENCY ETC...
//
//from ICU website:
//"ICU provides number spellout rules for several locales,
//but not for all of the locales that ICU supports, and not all of the
predefined rule types.
//Also, as of release 2.6, some of the provided rules are known to be
incomplete."

$number = 1234567890;

$formatter = new NumberFormatter('ja_JP', NumberFormatter::SPELLOUT);

echo "Default Japanese spellout:t" . $formatter->format($number) . "n";
//above gives [十二億三千四百五十六万七千八百九十] - the usual kanji numbers

     //see "Key/Type Definitions" at http://www.unicode.org/reports/tr35
$formatter = new NumberFormatter('ja_JP@numbers=jpanfin',
NumberFormatter::SPELLOUT);

echo "Modified locale spellout:t" . $formatter->format($number) . "n";
//above also gives [十二億三千四百五十六万七千八百九十] - not our financial kanji
numbers!

//Hmmmm, but if we now var_dump($formatter->getPattern()) we get:

//pattern for japanese spellout
//(interestingly, financial kanji here and at
http://www.sljfaq.org/afaq/banknote-numbers.html differ)
/*
 string(1520) "%financial:
    0: 零;
    1: 壱;
    2: 弐;
    3: 参;
    4: 四;
    5: 伍;
    6: 六;
    7: 七;
    8: 八;
    9: 九;
    10: 拾;
    11: 拾>%financial>;
    20: <%financial<拾;
    21: <%financial<拾>%financial>;
    100: <%financial<百;
    101: <%financial<百>%financial>;
    1000: <%financial<千;
    1001: <%financial<千>%financial>;
    10000: <%financial<萬;
    10001: <%financial<萬>%financial>;
    100000000: <%financial<億;
    100000001: <%financial<億>%financial>;
    1000000000000: <%financial<兆;
    1000000000001: <%financial<兆>%financial>;
    10000000000000000: =#,##0=;
    -x: マイナス>%financial>;
    x.x: <%financial<点>%financial>;
%traditional:
    0: 〇;
1: 一;
     2: 二;
     3: 三;
     4: 四;
     5: 五;
     6: 六;
     7: 七;
     8: 八;
     9: 九;
     10: 十;
     11: 十>%traditional>;
     20: <%traditional<十;
     21: <%traditional<十>%traditional>;
     100: 百;
     101: 百>%traditional>;
     200: <%traditional<百;
     201: <%traditional<百>%traditional>;
     1000: 千;
     1001: 千>%traditional>;
     2000: <%traditional<千;
     2001: <%traditional<千>%traditional>;
     10000: <%traditional<万;
     10001: <%traditional<万>%traditional>;
     100000000: <%traditional<億;
     100000001: <%traditional<億>%traditional>;
     1000000000000: <%traditional<兆;
     1000000000001: <%traditional<兆>%traditional>;
     10000000000000000: =#,##0=;
     -x: マイナス>%traditional>;
     x.x: <%traditional<・>%traditional>;
"
*/

//so the financial kanji are in there but how to wrangle them out??

$formatter = new NumberFormatter('ja_JP', NumberFormatter::SPELLOUT);

$formatter->setTextAttribute(NumberFormatter::DEFAULT_RULESET,
"%financial");

echo "setTextAttribute spellout:t" . $formatter->format($number) . "n";
//above gives [拾弐億参千四百伍拾六萬七千八百九拾] - bingo!

//now, out of curiosity

     //same formatter as above
$formatter->setTextAttribute(NumberFormatter::DEFAULT_RULESET,
"%traditional");

echo "setTextAttribute spellout:t" . $formatter->format($number) . "n";
//yes, this gives [十二億三千四百五十六万七千八百九十]

//notice that the %traditional and $financial patterns
//differ in more than just the characters used
//(for example, look at each format for the value of 100).
//let's take a look

$numbers = array(100, 199, 200, 201, 1000, 1999, 2000, 2001);

$traditional_formatter = new NumberFormatter('ja_JP',
NumberFormatter::SPELLOUT);
$financial_formatter = new NumberFormatter('ja_JP',
NumberFormatter::SPELLOUT);
$financial_formatter->setTextAttribute(NumberFormatter::DEFAULT_RULESET,
"%financial");

foreach($numbers as $number)
{
     echo "{$number} as traditional:t" . $traditional_formatter-
>format($number) . "n";
     echo "{$number} as financial:t" . $financial_formatter-
>format($number) . "n";
     echo "----------------n";
}

//outputs:
/*
100 as traditional:     百
100 as financial:       壱百
----------------
199 as traditional:     百九十九
199 as financial:       壱百九拾九
----------------
200 as traditional:     二百
200 as financial:       弐百
----------------
201 as traditional:     二百一
201 as financial:       弐百壱
----------------
1000 as traditional:    千
1000 as financial:      壱千
----------------
1999 as traditional:    千九百九十九
1999 as financial:      壱千九百九拾九
----------------
2000 as traditional:    二千
2000 as financial:      弐千
----------------
2001 as traditional:    二千一
2001 as financial:       弐千壱
----------------
*/
//We see that the different rules enable the financial spellout to write,
say, "one thousand" instead of
//the traditional "thousand". This clearly makes sense in an anti-forgery
context.

//User exercise: compare and contrast with PHPs' core localeconv()

?>

Locale::acceptFromHttp (not on presentation)

<?php

//note that this script file is UTF-8

//We can set Intl's locale based on the browser's HTTP_ACCEPT_LANGUAGE
header.
//Browser's send this header based on their "prefered language" setting.
//Only power users would tinker with this setting directly, but we can
assume
//that it is *usually* correct.
//Google sites are quite good at using this header, try changing your
//browser's prefered language setting and then visit your favourite
//Google site!

header("Content-Type: text/html; charset=UTF-8;");

echo 'Browser's Accept-Language header: ' .
$_SERVER['HTTP_ACCEPT_LANGUAGE'] . '<br>';

$browser_locale =
Locale::acceptFromHttp($_SERVER['HTTP_ACCEPT_LANGUAGE']);
echo 'Decided browser locale: ' . $browser_locale . '<br>';

Locale::setDefault($browser_locale);
echo 'Intl default locale now: ' . Locale::getDefault() . '<br>';    //a
check

$all_variants = Locale::getAllVariants(Locale::getDefault());
echo 'All variants: ';
print_r($all_variants);
echo '<br>';

$language_name = Locale::getDisplayLanguage(Locale::getDefault());
echo 'Language display name: ' . $language_name . '<br>';

$region_name = Locale::getDisplayRegion(Locale::getDefault());
echo 'Region display name: ' . $region_name . '<br>';

$script_name = Locale::getDisplayScript(Locale::getDefault());
echo 'Script display name: ' . $script_name . '<br>';
$variant_name = Locale::getDisplayVariant(Locale::getDefault());
echo 'Variant display name: ' . $variant_name . '<br>';

$keywords = Locale::getKeywords(Locale::getDefault());
echo 'Keywords: ';
print_r($keywords);
echo '<br>';

?>

More Related Content

What's hot (20)

PDF
Learning Perl 6
brian d foy
 
PDF
Acções professores outubro novembro 2012
João Lima
 
PDF
Rothke Press
Ben Rothke
 
PDF
6thoralmucosaldiseases 2010
LE HAI TRIEU
 
PDF
Part 1
acearmin
 
PDF
Text in search queries with examples in Perl 6
Andrew Shitov
 
TXT
Bouncingballs sh
Ben Pope
 
PDF
Arabic uae e_services_user_manual
Confidential
 
PDF
Rate3
satapisat
 
PDF
Rate4
satapisat
 
PPT
PHP and MySQL
Sanketkumar Biswas
 
PDF
Advanced modulinos trial
brian d foy
 
PDF
EB-85 A
esthershiang88
 
PDF
Linux shell script-1
兎 伊藤
 
PDF
Jornada Passivhaus 14 junio Wolfgang Berger y Eduardo Arias
baupassivhaus
 
PDF
Morley Residence brochure
Eggleston Farkas Architects, ps
 
PDF
Dip Your Toes in the Sea of Security (PHP South Africa 2017)
James Titcumb
 
PDF
Data Types Master
Paolo Marcatili
 
Learning Perl 6
brian d foy
 
Acções professores outubro novembro 2012
João Lima
 
Rothke Press
Ben Rothke
 
6thoralmucosaldiseases 2010
LE HAI TRIEU
 
Part 1
acearmin
 
Text in search queries with examples in Perl 6
Andrew Shitov
 
Bouncingballs sh
Ben Pope
 
Arabic uae e_services_user_manual
Confidential
 
Rate3
satapisat
 
Rate4
satapisat
 
PHP and MySQL
Sanketkumar Biswas
 
Advanced modulinos trial
brian d foy
 
Linux shell script-1
兎 伊藤
 
Jornada Passivhaus 14 junio Wolfgang Berger y Eduardo Arias
baupassivhaus
 
Morley Residence brochure
Eggleston Farkas Architects, ps
 
Dip Your Toes in the Sea of Security (PHP South Africa 2017)
James Titcumb
 
Data Types Master
Paolo Marcatili
 

Similar to "Internationalisation with PHP and Intl" source code (20)

PDF
R57php 1231677414471772-2
ady36
 
PDF
PHP and Rich Internet Applications
elliando dias
 
PDF
R57shell
ady36
 
PPT
I18n with PHP 5.3
ZendCon
 
PDF
Helvetia
ESUG
 
PPTX
php programming.pptx
rani marri
 
ODP
PHP Tips for certification - OdW13
julien pauli
 
PDF
OWASP Top 10 - DrupalCon Amsterdam 2019
Ayesh Karunaratne
 
PDF
PHP and Rich Internet Applications
elliando dias
 
PPT
PHP variables
Siddique Ibrahim
 
PPTX
Php functions
JIGAR MAKHIJA
 
PDF
[PL] Jak nie zostać "programistą" PHP?
Radek Benkel
 
PPTX
London XQuery Meetup: Querying the World (Web Scraping)
Dennis Knochenwefel
 
PDF
PHP for Grown-ups
Manuel Lemos
 
PPTX
Workshop 1: Good practices in JavaScript
Visual Engineering
 
PDF
LAMP_TRAINING_SESSION_6
umapst
 
PDF
Dirty Secrets of the PHP SOAP Extension
Adam Trachtenberg
 
TXT
Nop2
guestea6d59
 
PDF
2014 database - course 2 - php
Hung-yu Lin
 
R57php 1231677414471772-2
ady36
 
PHP and Rich Internet Applications
elliando dias
 
R57shell
ady36
 
I18n with PHP 5.3
ZendCon
 
Helvetia
ESUG
 
php programming.pptx
rani marri
 
PHP Tips for certification - OdW13
julien pauli
 
OWASP Top 10 - DrupalCon Amsterdam 2019
Ayesh Karunaratne
 
PHP and Rich Internet Applications
elliando dias
 
PHP variables
Siddique Ibrahim
 
Php functions
JIGAR MAKHIJA
 
[PL] Jak nie zostać "programistą" PHP?
Radek Benkel
 
London XQuery Meetup: Querying the World (Web Scraping)
Dennis Knochenwefel
 
PHP for Grown-ups
Manuel Lemos
 
Workshop 1: Good practices in JavaScript
Visual Engineering
 
LAMP_TRAINING_SESSION_6
umapst
 
Dirty Secrets of the PHP SOAP Extension
Adam Trachtenberg
 
2014 database - course 2 - php
Hung-yu Lin
 
Ad

More from Daniel_Rhodes (9)

PDF
PhoneGap by Dissection
Daniel_Rhodes
 
ODP
Hyperlocalisation or "localising everything"
Daniel_Rhodes
 
ODP
PHP floating point precision
Daniel_Rhodes
 
PPT
Creating a constructive comment culture
Daniel_Rhodes
 
ODP
Internationalisation with PHP and Intl
Daniel_Rhodes
 
ODP
Character sets and iconv
Daniel_Rhodes
 
PDF
"Character sets and iconv" PHP source code
Daniel_Rhodes
 
ODP
Handling multibyte CSV files in PHP
Daniel_Rhodes
 
ODP
Multibyte string handling in PHP
Daniel_Rhodes
 
PhoneGap by Dissection
Daniel_Rhodes
 
Hyperlocalisation or "localising everything"
Daniel_Rhodes
 
PHP floating point precision
Daniel_Rhodes
 
Creating a constructive comment culture
Daniel_Rhodes
 
Internationalisation with PHP and Intl
Daniel_Rhodes
 
Character sets and iconv
Daniel_Rhodes
 
"Character sets and iconv" PHP source code
Daniel_Rhodes
 
Handling multibyte CSV files in PHP
Daniel_Rhodes
 
Multibyte string handling in PHP
Daniel_Rhodes
 
Ad

Recently uploaded (20)

PDF
CIFDAQ's Market Wrap : Bears Back in Control?
CIFDAQ
 
PDF
OFFOFFBOX™ – A New Era for African Film | Startup Presentation
ambaicciwalkerbrian
 
PPTX
Dev Dives: Automate, test, and deploy in one place—with Unified Developer Exp...
AndreeaTom
 
PDF
Economic Impact of Data Centres to the Malaysian Economy
flintglobalapac
 
PDF
AI Unleashed - Shaping the Future -Starting Today - AIOUG Yatra 2025 - For Co...
Sandesh Rao
 
PPTX
Agentic AI in Healthcare Driving the Next Wave of Digital Transformation
danielle hunter
 
PDF
NewMind AI Weekly Chronicles – July’25, Week III
NewMind AI
 
PDF
introduction to computer hardware and sofeware
chauhanshraddha2007
 
PDF
Data_Analytics_vs_Data_Science_vs_BI_by_CA_Suvidha_Chaplot.pdf
CA Suvidha Chaplot
 
PDF
RAT Builders - How to Catch Them All [DeepSec 2024]
malmoeb
 
PDF
Generative AI vs Predictive AI-The Ultimate Comparison Guide
Lily Clark
 
PPTX
Introduction to Flutter by Ayush Desai.pptx
ayushdesai204
 
PPTX
The Future of AI & Machine Learning.pptx
pritsen4700
 
PPTX
Agile Chennai 18-19 July 2025 Ideathon | AI Powered Microfinance Literacy Gui...
AgileNetwork
 
PDF
How Open Source Changed My Career by abdelrahman ismail
a0m0rajab1
 
PPTX
Agile Chennai 18-19 July 2025 | Workshop - Enhancing Agile Collaboration with...
AgileNetwork
 
PDF
MASTERDECK GRAPHSUMMIT SYDNEY (Public).pdf
Neo4j
 
PDF
Peak of Data & AI Encore - Real-Time Insights & Scalable Editing with ArcGIS
Safe Software
 
PDF
State-Dependent Conformal Perception Bounds for Neuro-Symbolic Verification
Ivan Ruchkin
 
PPTX
AI and Robotics for Human Well-being.pptx
JAYMIN SUTHAR
 
CIFDAQ's Market Wrap : Bears Back in Control?
CIFDAQ
 
OFFOFFBOX™ – A New Era for African Film | Startup Presentation
ambaicciwalkerbrian
 
Dev Dives: Automate, test, and deploy in one place—with Unified Developer Exp...
AndreeaTom
 
Economic Impact of Data Centres to the Malaysian Economy
flintglobalapac
 
AI Unleashed - Shaping the Future -Starting Today - AIOUG Yatra 2025 - For Co...
Sandesh Rao
 
Agentic AI in Healthcare Driving the Next Wave of Digital Transformation
danielle hunter
 
NewMind AI Weekly Chronicles – July’25, Week III
NewMind AI
 
introduction to computer hardware and sofeware
chauhanshraddha2007
 
Data_Analytics_vs_Data_Science_vs_BI_by_CA_Suvidha_Chaplot.pdf
CA Suvidha Chaplot
 
RAT Builders - How to Catch Them All [DeepSec 2024]
malmoeb
 
Generative AI vs Predictive AI-The Ultimate Comparison Guide
Lily Clark
 
Introduction to Flutter by Ayush Desai.pptx
ayushdesai204
 
The Future of AI & Machine Learning.pptx
pritsen4700
 
Agile Chennai 18-19 July 2025 Ideathon | AI Powered Microfinance Literacy Gui...
AgileNetwork
 
How Open Source Changed My Career by abdelrahman ismail
a0m0rajab1
 
Agile Chennai 18-19 July 2025 | Workshop - Enhancing Agile Collaboration with...
AgileNetwork
 
MASTERDECK GRAPHSUMMIT SYDNEY (Public).pdf
Neo4j
 
Peak of Data & AI Encore - Real-Time Insights & Scalable Editing with ArcGIS
Safe Software
 
State-Dependent Conformal Perception Bounds for Neuro-Symbolic Verification
Ivan Ruchkin
 
AI and Robotics for Human Well-being.pptx
JAYMIN SUTHAR
 

"Internationalisation with PHP and Intl" source code

  • 1. First play and test <?php //note that this script file is UTF-8 //UTF-8 CLI assumed, else you'll need this: //header("Content-Type: text/html; charset=UTF-8;"); //'hi' is Hindi, 'fa' is Farsi, 'ar_EG' is Egyptian Arabic $locales = array('en', 'en_US', 'fr_FR', 'de_DE', 'hi', 'fa', 'ar_EG'); $number = 1234567890; foreach($locales as $locale) { $formatter = new NumberFormatter($locale, NumberFormatter::DECIMAL); echo $locale . ":t" . $formatter->format($number) . "n"; } //Output: //en: 1,234,567,890 //en_US: 1,234,567,890 //fr_FR: 1 234 567 890 //de_DE: 1.234.567.890 //hi: १,२३,४५,६७,८९० //fa: ۱٬۲۳۴٬۵۶۷٬۸۹۰ //ar_EG: ١٬٢٣٤٬٥٦٧٬٨٩٠ ?> Sorting German <?php //note that this script file is UTF-8 //UTF-8 CLI assumed, else you'll need this: //header("Content-Type: text/html; charset=UTF-8;"); //some German surnames $german_names = array('Weiß', 'Goldmann', 'Göbel', 'Weiss', 'Göthe', 'Goethe', 'Götz'); sort($german_names); //gives Array ( [0] => Goethe [1] => Goldmann [2] => Göbel [3] => Göthe //[4] => Götz [5] => Weiss [6] => Weiß ) //which COINCIDENTALLY is the Austrian sort order print_r($german_names); sort($german_names, SORT_STRING); //default is SORT_REGULAR print_r($german_names); //gives same as above //BTW, you're not going to get far with setlocale() if you don't //have that particular locale supported on your OS! //on *nixes, something like:
  • 2. //> locale --all-locales //will give you a list of all installed locales // //you can give setlocale() a *list* of locales to try if //you're not sure how your OS is spelling it etc // //ICU (and therefore Intl) uses its own locales and is not dependent //on the operating system for the locale data setlocale(LC_ALL, 'de_DE'); sort($german_names, SORT_LOCALE_STRING); print_r($german_names); //above gives Array ( [0] => Göbel [1] => Göthe [2] => Götz [3] => Goethe //[4] => Goldmann [5] => Weiß [6] => Weiss ) //which isn't dictionary, phonebook or Austrian sort order :( //[it seems to be "umlauted vowel comes before plain vowel, eszett //comes before double ess" order] setlocale(LC_ALL, 'de_AT'); sort($german_names, SORT_LOCALE_STRING); print_r($german_names); //above gives same as for de_DE - ie. nothing good :( //this is curious... setlocale(LC_ALL, 'de_DE.utf8'); sort($german_names, SORT_LOCALE_STRING); print_r($german_names); //above gives Array ( [0] => Göbel [1] => Goethe [2] => Goldmann //[3] => Göthe [4] => Götz [5] => Weiss [6] => Weiß ) //which is our dictionary sort order! //----Let's try using Intl-------- $coll = new Collator('de_DE'); $coll->sort($german_names); //gives Array ( [0] => Göbel [1] => Goethe [2] => Goldmann [3] => Göthe //[4] => Götz [5] => Weiss [6] => Weiß ) //which is our dictionary sort order! print_r($german_names); //Collator constructor can accept UCA keywords $coll = new Collator('de@collation=phonebook'); //see http://userguide.icu-project.org/collation/architecture and http://userguide.icu-project.org/locale $coll->sort($german_names); //gives Array ( [0] => Göbel [1] => Goethe [2] => Göthe [3] => Götz //[4] => Goldmann [5] => Weiss [6] => Weiß ) //which is our phonebook sort order! print_r($german_names); ?>
  • 3. Japanese era <?php //note that this script file is UTF-8 //UTF-8 CLI assumed, else you'll need this: //header("Content-Type: text/html; charset=UTF-8;"); $timezones = array('en_GB' => 'Europe/London', 'ja_JP' => 'Asia/Tokyo', 'ja_JP@calendar=japanese' => 'Asia/Tokyo'); $now = new DateTime(); //DateTime is a core PHP class as of version 5.2.0 foreach($timezones as $locale => $timezone) { $calendar = IntlDateFormatter::GREGORIAN; if(strpos($locale, 'calendar=') !== false) { //slightly presumptuous as @calendar=gregorian also exists $calendar = IntlDateFormatter::TRADITIONAL; } $formatter = new IntlDateFormatter($locale, IntlDateFormatter::FULL, IntlDateFormatter::FULL, $timezone, $calendar); echo 'It is now: "' . $formatter->format($now) . '" in ' . "{$timezone}n"; } //Last line of output gives "平成 23 年" which is Heisei 23! ?> Korean numbers <?php //note that this script file is UTF-8 //UTF-8 CLI assumed, else you'll need this: //header("Content-Type: text/html; charset=UTF-8;"); //See: //http://en.wikipedia.org/wiki/Korean_numerals //http://askakorean.blogspot.com/2010/03/korean-language-series-sino- korean.html $number = 1234567890; $formatter = new NumberFormatter('ko_KR', NumberFormatter::SPELLOUT); echo "Korean spellout ({$formatter->getLocale()}):t" . $formatter->format($number) . "n"; //above gives [Korean spellout (en_GB): one thousand two hundred and thirty-four
  • 4. //million, five hundred and sixty-seven thousand, eight hundred and ninety] //ie. locale has fallen back to system default (in this case en_GB) //but ko_KR is a valid ICU locale string, so let's check: $formatter = new NumberFormatter('ko_KR', NumberFormatter::CURRENCY); echo "Korean currency ({$formatter->getLocale()}):t" . $formatter->format($number) . "n"; //above gives [Korean currency (ko): ₩1,234,567,890] which is correct //ok, so it looks like we don't have the rules for Korean spellout //we'll have to supply the NumberFormatter with our own ruleset. //the technical details of the ruleset format are at //http://userguide.icu-project.org/formatparse/numbers/rbnf-examples //BUT we *do* have a ruleset for Japanese spellout which we can modify and use! //(it's similar because it also counts in ten thousands and has non-Arabic numerals) //the Japanese spellout ruleset is this (construct a NumberFormatter for Japanese //spellout and then var_dump($formatter->getPattern())): // // //pattern for japanese spellout /* string(1520) "%financial: 0: 零; 1: 壱; 2: 弐; 3: 参; 4: 四; 5: 伍; 6: 六; 7: 七; 8: 八; 9: 九; 10: 拾; 11: 拾>%financial>; 20: <%financial<拾; 21: <%financial<拾>%financial>; 100: <%financial<百; 101: <%financial<百>%financial>; 1000: <%financial<千; 1001: <%financial<千>%financial>; 10000: <%financial<萬; 10001: <%financial<萬>%financial>; 100000000: <%financial<億; 100000001: <%financial<億>%financial>;
  • 5. 1000000000000: <%financial<兆; 1000000000001: <%financial<兆>%financial>; 10000000000000000: =#,##0=; -x: マイナス>%financial>; x.x: <%financial<点>%financial>; %traditional: 0: 〇; 1: 一; 2: 二; 3: 三; 4: 四; 5: 五; 6: 六; 7: 七; 8: 八; 9: 九; 10: 十; 11: 十>%traditional>; 20: <%traditional<十; 21: <%traditional<十>%traditional>; 100: 百; 101: 百>%traditional>; 200: <%traditional<百; 201: <%traditional<百>%traditional>; 1000: 千; 1001: 千>%traditional>; 2000: <%traditional<千; 2001: <%traditional<千>%traditional>; 10000: <%traditional<万; 10001: <%traditional<万>%traditional>; 100000000: <%traditional<億; 100000001: <%traditional<億>%traditional>; 1000000000000: <%traditional<兆; 1000000000001: <%traditional<兆>%traditional>; 10000000000000000: =#,##0=; -x: マイナス>%traditional>; x.x: <%traditional<・>%traditional>; " */ //basically, for Japanese we count in groups of ten thousands //and we have a traditional set of characters //and a financial (anti-forgery) set of characters. // //Korean also counts in groups of ten thousands
  • 6. //so let's modify this pattern for Korean //(note that we'll use only South Korean Sino-Korean numbers) $korean_pattern = '%hangul: 0: 영; 1: 일; 2: 이; 3: 삼; 4: 사; 5: 오; 6: 육; 7: 칠; 8: 팔; 9: 구; 10: 십; 11: 십>%hangul>; 20: <%hangul<십; 21: <%hangul<십>%hangul>; 100: 백; 101: 백>%hangul>; 200: <%hangul<백; 201: <%hangul<백>%hangul>; 1000: 천; 1001: 천>%hangul>; 2000: <%hangul<천; 2001: <%hangul<천>%hangul>; 10000: <%hangul<만; 10001: <%hangul<만>%hangul>; 100000000: <%hangul<억; 100000001: <%hangul<억>%hangul>; 1000000000000: <%hangul<조; 1000000000001: <%hangul<조>%hangul>; 10000000000000000: =#,##0=; -x: ->%hangul>; x.x: <%hangul<・>%hangul>; %hanja: 0: 〇; 1: 一; 2: 二; 3: 三; 4: 四; 5: 五; 6: 六; 7: 七; 8: 八; 9: 九; 10: 十; 11: 十>%hanja>; 20: <%hanja<十; 21: <%hanja<十>%hanja>; 100: 百; 101: 百>%hanja>;
  • 7. 200: <%hanja<百; 201: <%hanja<百>%hanja>; 1000: 千; 1001: 千>%hanja>; 2000: <%hanja<千; 2001: <%hanja<千>%hanja>; 10000: <%hanja<萬; 10001: <%hanja<萬>%hanja>; 100000000: <%hanja<億; 100000001: <%hanja<億>%hanja>; 1000000000000: <%hanja<兆; 1000000000001: <%hanja<兆>%hanja>; 10000000000000000: =#,##0=; -x: ->%hanja>; x.x: <%hanja<・>%hanja>;'; $formatter = new NumberFormatter('ko_KR', NumberFormatter::PATTERN_RULEBASED, $korean_pattern); $formatter->setTextAttribute(NumberFormatter::DEFAULT_RULESET, "%hangul"); $numbers = array_merge(range(0, 20), range(30, 100, 10), array(1000, 10000, 100000000, 1000000000000)); foreach($numbers as $number) { echo "{$number}: {$formatter->format($number)}n"; } //outputs a correct list of Korean Hangul numbers ?> strftime (not on presentation) <?php //note that this script file is UTF-8 //UTF-8 CLI assumed, else you'll need this: //header("Content-Type: text/html; charset=UTF-8;"); //out of curiosity, let's see how PHP's core strftime() handles //some different locale date formats //BTW, you're not going to get far with setlocale() if you don't //have that particular locale supported on your OS! //on *nixes, something like: //> locale --all-locales //will give you a list of all installed locales //
  • 8. //you can give setlocale() a *list* of locales to try if //you're not sure how your OS is spelling it etc // //ICU (and therefore Intl) uses its own locales and is not dependent //on the operating system for the locale data $locales = array(array('fi_FI.utf8', 'fi_FI'), array('ja_JP.utf8', 'ja_JP'), array('fr_FR.utf8', 'fr_FR')); $format = "%A %d %B %Y"; //ie. "Wednesday 17 August 2011" foreach($locales as $locale_array) { $locale = setlocale(LC_TIME, $locale_array); echo "{$locale}:t" . strftime($format) . "n"; } //Output: //fi_FI.utf8: keskiviikko 17 elokuu 2011 //ja_JP.utf8: 水曜日 17 8 月 2011 //fr_FR.utf8: mercredi 17 août 2011 //Comment: //Not bad at all. The Japanese date won't be very natural-looking for a //native Japanese speaker as the day and year aren't quantified with the //appropriate character. //Also seem to be some implementation issues (http://uk.php.net/manual/en/function.strftime.php) //Intl extension is giving us more power and flexibility ?> Japanese financial numbers (not on presentation) <?php //note that this script file is UTF-8 //UTF-8 CLI assumed, else you'll need this: //header("Content-Type: text/html; charset=UTF-8;"); //SUPPORTED LOCALES FOR "SPELLOUT" IS A LOT MORE LIMITED //THAN FOR DECIMAL OR CURRENCY ETC... // //from ICU website: //"ICU provides number spellout rules for several locales, //but not for all of the locales that ICU supports, and not all of the predefined rule types. //Also, as of release 2.6, some of the provided rules are known to be incomplete." $number = 1234567890; $formatter = new NumberFormatter('ja_JP', NumberFormatter::SPELLOUT); echo "Default Japanese spellout:t" . $formatter->format($number) . "n";
  • 9. //above gives [十二億三千四百五十六万七千八百九十] - the usual kanji numbers //see "Key/Type Definitions" at http://www.unicode.org/reports/tr35 $formatter = new NumberFormatter('ja_JP@numbers=jpanfin', NumberFormatter::SPELLOUT); echo "Modified locale spellout:t" . $formatter->format($number) . "n"; //above also gives [十二億三千四百五十六万七千八百九十] - not our financial kanji numbers! //Hmmmm, but if we now var_dump($formatter->getPattern()) we get: //pattern for japanese spellout //(interestingly, financial kanji here and at http://www.sljfaq.org/afaq/banknote-numbers.html differ) /* string(1520) "%financial: 0: 零; 1: 壱; 2: 弐; 3: 参; 4: 四; 5: 伍; 6: 六; 7: 七; 8: 八; 9: 九; 10: 拾; 11: 拾>%financial>; 20: <%financial<拾; 21: <%financial<拾>%financial>; 100: <%financial<百; 101: <%financial<百>%financial>; 1000: <%financial<千; 1001: <%financial<千>%financial>; 10000: <%financial<萬; 10001: <%financial<萬>%financial>; 100000000: <%financial<億; 100000001: <%financial<億>%financial>; 1000000000000: <%financial<兆; 1000000000001: <%financial<兆>%financial>; 10000000000000000: =#,##0=; -x: マイナス>%financial>; x.x: <%financial<点>%financial>; %traditional: 0: 〇;
  • 10. 1: 一; 2: 二; 3: 三; 4: 四; 5: 五; 6: 六; 7: 七; 8: 八; 9: 九; 10: 十; 11: 十>%traditional>; 20: <%traditional<十; 21: <%traditional<十>%traditional>; 100: 百; 101: 百>%traditional>; 200: <%traditional<百; 201: <%traditional<百>%traditional>; 1000: 千; 1001: 千>%traditional>; 2000: <%traditional<千; 2001: <%traditional<千>%traditional>; 10000: <%traditional<万; 10001: <%traditional<万>%traditional>; 100000000: <%traditional<億; 100000001: <%traditional<億>%traditional>; 1000000000000: <%traditional<兆; 1000000000001: <%traditional<兆>%traditional>; 10000000000000000: =#,##0=; -x: マイナス>%traditional>; x.x: <%traditional<・>%traditional>; " */ //so the financial kanji are in there but how to wrangle them out?? $formatter = new NumberFormatter('ja_JP', NumberFormatter::SPELLOUT); $formatter->setTextAttribute(NumberFormatter::DEFAULT_RULESET, "%financial"); echo "setTextAttribute spellout:t" . $formatter->format($number) . "n"; //above gives [拾弐億参千四百伍拾六萬七千八百九拾] - bingo! //now, out of curiosity //same formatter as above
  • 11. $formatter->setTextAttribute(NumberFormatter::DEFAULT_RULESET, "%traditional"); echo "setTextAttribute spellout:t" . $formatter->format($number) . "n"; //yes, this gives [十二億三千四百五十六万七千八百九十] //notice that the %traditional and $financial patterns //differ in more than just the characters used //(for example, look at each format for the value of 100). //let's take a look $numbers = array(100, 199, 200, 201, 1000, 1999, 2000, 2001); $traditional_formatter = new NumberFormatter('ja_JP', NumberFormatter::SPELLOUT); $financial_formatter = new NumberFormatter('ja_JP', NumberFormatter::SPELLOUT); $financial_formatter->setTextAttribute(NumberFormatter::DEFAULT_RULESET, "%financial"); foreach($numbers as $number) { echo "{$number} as traditional:t" . $traditional_formatter- >format($number) . "n"; echo "{$number} as financial:t" . $financial_formatter- >format($number) . "n"; echo "----------------n"; } //outputs: /* 100 as traditional: 百 100 as financial: 壱百 ---------------- 199 as traditional: 百九十九 199 as financial: 壱百九拾九 ---------------- 200 as traditional: 二百 200 as financial: 弐百 ---------------- 201 as traditional: 二百一 201 as financial: 弐百壱 ---------------- 1000 as traditional: 千 1000 as financial: 壱千 ---------------- 1999 as traditional: 千九百九十九 1999 as financial: 壱千九百九拾九 ---------------- 2000 as traditional: 二千 2000 as financial: 弐千 ----------------
  • 12. 2001 as traditional: 二千一 2001 as financial: 弐千壱 ---------------- */ //We see that the different rules enable the financial spellout to write, say, "one thousand" instead of //the traditional "thousand". This clearly makes sense in an anti-forgery context. //User exercise: compare and contrast with PHPs' core localeconv() ?> Locale::acceptFromHttp (not on presentation) <?php //note that this script file is UTF-8 //We can set Intl's locale based on the browser's HTTP_ACCEPT_LANGUAGE header. //Browser's send this header based on their "prefered language" setting. //Only power users would tinker with this setting directly, but we can assume //that it is *usually* correct. //Google sites are quite good at using this header, try changing your //browser's prefered language setting and then visit your favourite //Google site! header("Content-Type: text/html; charset=UTF-8;"); echo 'Browser's Accept-Language header: ' . $_SERVER['HTTP_ACCEPT_LANGUAGE'] . '<br>'; $browser_locale = Locale::acceptFromHttp($_SERVER['HTTP_ACCEPT_LANGUAGE']); echo 'Decided browser locale: ' . $browser_locale . '<br>'; Locale::setDefault($browser_locale); echo 'Intl default locale now: ' . Locale::getDefault() . '<br>'; //a check $all_variants = Locale::getAllVariants(Locale::getDefault()); echo 'All variants: '; print_r($all_variants); echo '<br>'; $language_name = Locale::getDisplayLanguage(Locale::getDefault()); echo 'Language display name: ' . $language_name . '<br>'; $region_name = Locale::getDisplayRegion(Locale::getDefault()); echo 'Region display name: ' . $region_name . '<br>'; $script_name = Locale::getDisplayScript(Locale::getDefault()); echo 'Script display name: ' . $script_name . '<br>';
  • 13. $variant_name = Locale::getDisplayVariant(Locale::getDefault()); echo 'Variant display name: ' . $variant_name . '<br>'; $keywords = Locale::getKeywords(Locale::getDefault()); echo 'Keywords: '; print_r($keywords); echo '<br>'; ?>