Skype Status

My status
UTF-8 Encoding PDF Print E-mail

UTF-8 can encode any Unicode character, preventing the need to set a encoding type for every language that is going to be used in the site and allowing output in multiple languages at the same time. I would recommend to use this type of encoding in case you are setting up a multilingual e-shop on a MySQL5 / PHP5 platform.

There are many more advantages against only a few disadvantages when using the utf-8 encoding, like compatibility with XML (feeds / sitemaps) and other programs that are use the UTF-8 encoding.

How to adjust osCommerce to use UTF-8

  1. For every language you use edit the main language file (for example: catalog/includes/languages/greek.php and catalog/admin/includes/languages/greek.php) below:
      @setlocale(LC_TIME, 'xx_XX.UTF8'); //xx_XX stands for your language
    Add
      mb_internal_encoding("UTF-8");
  2. In catalog(/admin)/includes/functions/database.php add mysql_query("SET NAMES 'UTF8'", $$link); after the lines
    if (USE_PCONNECT == 'true') {
          $$link = mysql_pconnect($server, $username, $password);
        } else {
          $$link = mysql_connect($server, $username, $password);
        }
  3. Check that the (text) table fields in your database are set to the utf8_general_ci collation.
  4. Make sure the language packs you are going to use are encoded in and set to UTF-8.
  5. In catalog/includes/functions/general.php replace the complete function tep_parse_search_string with:
        function tep_parse_search_string($search_str = '', &$objects) {
        $search_str = trim(mb_strtolower($search_str));

    // Break up $search_str on whitespace; quoted string will be reconstructed later
        $pieces = mb_split('[[:space:]]+', $search_str);
        $objects = array();
        $tmpstring = '';
        $flag = '';

        for ($k=0; $k<count($pieces); $k++) {
          while (mb_substr($pieces[$k], 0, 1) == '(') {
            $objects[] = '(';
            if (mb_strlen($pieces[$k]) > 1) {
              $pieces[$k] = mb_substr($pieces[$k], 1);
            } else {
              $pieces[$k] = '';
            }
          }

          $post_objects = array();

          while (mb_substr($pieces[$k], -1) == ')')  {
            $post_objects[] = ')';
            if (mb_strlen($pieces[$k]) > 1) {
              $pieces[$k] = mb_substr($pieces[$k], 0, -1);
            } else {
              $pieces[$k] = '';
            }
          }

    // Check individual words

          if ( (mb_substr($pieces[$k], -1) != '"') && (mb_substr($pieces[$k], 0, 1) != '"') ) {
            $objects[] = trim($pieces[$k]);

            for ($j=0; $j<count($post_objects); $j++) {
              $objects[] = $post_objects[$j];
            }
          } else {
    /* This means that the $piece is either the beginning or the end of a string.
       So, we'll slurp up the $pieces and stick them together until we get to the
       end of the string or run out of pieces.
    */

    // Add this word to the $tmpstring, starting the $tmpstring
            $tmpstring = trim(mb_ereg_replace('"', ' ', $pieces[$k]));

    // Check for one possible exception to the rule. That there is a single quoted word.
            if (mb_substr($pieces[$k], -1 ) == '"') {
    // Turn the flag off for future iterations
              $flag = 'off';

              $objects[] = trim($pieces[$k]);

              for ($j=0; $j<count($post_objects); $j++) {
                $objects[] = $post_objects[$j];
              }

              unset($tmpstring);

    // Stop looking for the end of the string and move onto the next word.
              continue;
            }

    // Otherwise, turn on the flag to indicate no quotes have been found attached to this word in the string.
            $flag = 'on';

    // Move on to the next word
            $k++;

    // Keep reading until the end of the string as long as the $flag is on

            while ( ($flag == 'on') && ($k < count($pieces)) ) {
              while (mb_substr($pieces[$k], -1) == ')') {
                $post_objects[] = ')';
                if (mb_strlen($pieces[$k]) > 1) {
                  $pieces[$k] = mb_substr($pieces[$k], 0, -1);
                } else {
                  $pieces[$k] = '';
                }
              }

    // If the word doesn't end in double quotes, append it to the $tmpstring.
              if (mb_substr($pieces[$k], -1) != '"') {
    // Tack this word onto the current string entity
                $tmpstring .= ' ' . $pieces[$k];

    // Move on to the next word
                $k++;
                continue;
              } else {
    /* If the $piece ends in double quotes, strip the double quotes, tack the
       $piece onto the tail of the string, push the $tmpstring onto the $haves,
       kill the $tmpstring, turn the $flag "off", and return.
    */
                $tmpstring .= ' ' . trim(mb_ereg_replace('"', ' ', $pieces[$k]));

    // Push the $tmpstring onto the array of stuff to search for
                $objects[] = trim($tmpstring);

                for ($j=0; $j<count($post_objects); $j++) {
                  $objects[] = $post_objects[$j];
                }

                unset($tmpstring);

    // Turn off the flag to exit the loop
                $flag = 'off';
              }
            }
          }
        }

    // add default logical operators if needed
        $temp = array();
        for($i=0; $i<(count($objects)-1); $i++) {
          $temp[] = $objects[$i];
          if ( ($objects[$i] != 'and') &&
               ($objects[$i] != 'or') &&
               ($objects[$i] != '(') &&
               ($objects[$i+1] != 'and') &&
               ($objects[$i+1] != 'or') &&
               ($objects[$i+1] != ')') ) {
            $temp[] = ADVANCED_SEARCH_DEFAULT_OPERATOR;
          }
        }
        $temp[] = $objects[$i];
        $objects = $temp;

        $keyword_count = 0;
        $operator_count = 0;
        $balance = 0;
        for($i=0; $i<count($objects); $i++) {
          if ($objects[$i] == '(') $balance --;
          if ($objects[$i] == ')') $balance ++;
          if ( ($objects[$i] == 'and') || ($objects[$i] == 'or') ) {
            $operator_count ++;
          } elseif ( ($objects[$i]) && ($objects[$i] != '(') && ($objects[$i] != ')') ) {
            $keyword_count ++;
          }
        }

        if ( ($operator_count < $keyword_count) && ($balance == 0) ) {
          return true;
        } else {
          return false;
        }
      }
  6. Correct the function tep_break_string (catalog(admin)/includes/functions/general.php):
      function tep_break_string($string, $len, $break_char = '-') {
        $l = 0;
        $output = '';
        for ($i=0, $n=strlen($string); $i<$n; $i++) {
          $char = substr($string, $i, 1);
          if ($char != ' ') {
            $l++;
          } else {
            $l = 0;
          }
          if ($l > $len) {
            $l = 1;
            $output .= $break_char;
          }
          $output .= $char;
        }

        return $output;
      }

    to:
      function tep_break_string($string, $len, $break_char = '-') {
        $l = 0;
        $output = '';
        for ($i=0, $n=mb_strlen($string); $i<$n; $i++) {
          $char = mb_substr($string, $i, 1);
          if ($char != ' ') {
            $l++;
          } else {
            $l = 0;
          }
          if ($l > $len) {
            $l = 1;
            $output .= $break_char;
          }
          $output .= $char;
        }

        return $output;
      }
  7. In catalog/(admin/)/includes/classes/email.php change:
         if ((strstr($to_name, "\n") != false) || (strstr($to_name, "\r") != false)) {
            return false;
          }

          if ((strstr($to_addr, "\n") != false) || (strstr($to_addr, "\r") != false)) {
            return false;
          }

          if ((strstr($subject, "\n") != false) || (strstr($subject, "\r") != false)) {
            return false;
          }

          if ((strstr($from_name, "\n") != false) || (strstr($from_name, "\r") != false)) {
            return false;
          }

          if ((strstr($from_addr, "\n") != false) || (strstr($from_addr, "\r") != false)) {
            return false;
          }

          $to = (($to_name != '') ? '"' . $to_name . '" ' : $to_addr);
          $from = (($from_name != '') ? '"' . $from_name . '" ' : $from_addr);

    to:
          if ((mb_strstr($to_name, "\n") != false) || (mb_strstr($to_name, "\r") != false)) {
            return false;
          }

          if ((mb_strstr($to_addr, "\n") != false) || (mb_strstr($to_addr, "\r") != false)) {
            return false;
          }

          if ((mb_strstr($subject, "\n") != false) || (mb_strstr($subject, "\r") != false)) {
            return false;
          }

          if ((mb_strstr($from_name, "\n") != false) || (mb_strstr($from_name, "\r") != false)) {
            return false;
          }

          if ((mb_strstr($from_addr, "\n") != false) || (mb_strstr($from_addr, "\r") != false)) {
            return false;
          }

          $tommod_encoding = strtolower(constant('CHARSET'));
          if($tommod_encoding=="utf-8"){
          mb_internal_encoding("utf-8");
          $subject = mb_encode_mimeheader($subject, "utf-8", "q");
          $from_name = '';
          $to_name = '';


          if(mb_ereg("<", $from_addr))
          {
          $froma = mb_split("<", $from_addr);
          $from_addr = str_replace(">", "", $froma[1]);
          }
         
          if(mb_ereg("<", $to_addr))
          {
          $toa = mb_split("<", $to_addr);
          $to_addr = str_replace(">", "", $toa[1]);
          }     
          }


          $to = (($to_name != '') ? '"' . $to_name . '" ' : $to_addr);
          $from = (($from_name != '') ? '"' . $from_name . '" ' : $from_addr);