<?php
// $Id: transliteration.module,v 1.5.2.4 2009/11/28 12:48:53 smk Exp $

/**
 * @file
 * Converts non-latin text to US-ASCII and sanitizes file names.
 *
 * Uses data from the Text::Unidecode Perl library.
 * @see http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
 *
 * @author Stefan M. Kudwien (http://drupal.org/user/48898)
 */

/**
 * Implements hook_menu().
 */
function transliteration_menu() {
  $items['admin/settings/file-system/settings'] = array(
    'title' => 'Settings',
    'weight' => -10,
    'type' => MENU_DEFAULT_LOCAL_TASK,
  );
  $items['admin/settings/file-system/transliteration'] = array(
    'title' => 'Transliteration',
    'description' => 'Convert existing file names to US-ASCII.',
    'page callback' => 'drupal_get_form',
    'page arguments' => array('transliteration_retroactive'),
    'access arguments' => array('administer site configuration'),
    'file' => 'transliteration.admin.inc',
    'weight' => 10,
    'type' => MENU_LOCAL_TASK,
  );
  return $items;
}

/**
 * Implements hook_form_FORM_ID_alter().
 *
 * Add transliteration settings to the file system configuration form.
 */
function transliteration_form_system_file_system_settings_alter(&$form, &$form_state) {
  $form['transliteration'] = array(
    '#type' => 'item',
    '#title' => t('Transliteration'),
    '#value' => '',
  );
  $form['transliteration']['transliteration_file_uploads'] = array(
    '#type' => 'checkbox',
    '#title' => t('Enable transliteration of file names during upload.'),
    '#default_value' => variable_get('transliteration_file_uploads', TRUE),
  );
  $form['transliteration']['transliteration_file_lowercase'] = array(
    '#type' => 'checkbox',
    '#title' => t('Lowercase transliterated file names.'),
    '#default_value' => variable_get('transliteration_file_lowercase', TRUE),
    '#description' => t('This is recommended to prevent issues with case-insensitive file systems.'),
  );
  $form['buttons']['#weight'] = 1;
}

/**
 * Sanitize a file path.
 *
 * Additionally removes invalid characters after transliterating.
 *
 * @param $filename
 *   A file name.
 * @param $source_langcode
 *   Optional ISO 639 language code that denotes the language of the input.
 *   Used to apply language-specific variations and defaults to the current
 *   display language. If transliteration takes place during output (instead
 *   of creation) and the source language is not known at that time, it is
 *   recommended to set this argument to 'en' to produce consistent results
 *   for all enabled languages.
 * @return
 *   Cleaned file name.
 */
function transliteration_clean_filename($filename, $source_langcode = NULL) {
  $filename = transliteration_get($filename, '', $source_langcode);
  // Replace whitespace.
  $filename = str_replace(' ', '_', $filename);
  // Remove remaining unsafe characters.
  $filename = preg_replace('![^0-9A-Za-z_./-]!', '', $filename);
  // Force lowercase to prevent issues on case-insensitive file systems.
  if (variable_get('transliteration_file_lowercase', TRUE)) {
    $filename = strtolower($filename);
  }
  return $filename;
}

/**
 * Transliterate UTF-8 text to ASCII.
 *
 * Takes an input string in any language and character set, and tries to
 * represent it in ASCII characters by conveying, in Roman letters, the
 * pronunciation expressed by the text in some other writing system.
 *
 * @param $input
 *   UTF-8 text input.
 * @param $unknown
 *   Replacement string for characters that do not have a suitable ASCII
 *   equivalent.
 * @param $source_langcode
 *   Optional ISO 639 language code that denotes the language of the input.
 *   Used to apply language-specific variations and defaults to the current
 *   display language. If transliteration takes place during output (instead
 *   of creation) and the source language is not known at that time, it is
 *   recommended to set this argument to 'en' to produce consistent results
 *   for all enabled languages.
 * @return
 *   Transliterated text.
 */
function transliteration_get($input, $unknown = '?', $source_langcode = NULL) {
  static $loaded = FALSE;
  if (!$loaded) {
    module_load_include('inc', 'transliteration');
    $loaded = TRUE;
  }
  return transliteration_process($input, $unknown, $source_langcode);
}

/**
 * Implementation of hook_init().
 *
 * Sanitize names of file uploads.
 */
function transliteration_init() {
  if (!empty($_FILES['files']) && variable_get('transliteration_file_uploads', TRUE)) {
    // Figure out language, which is available for node form submits.
    $langcode = NULL;
    if (!empty($_POST['language'])) {
      $languages = language_list();
      if (isset($languages[$_POST['language']])) {
        $langcode = $_POST['language'];
      }
    }
    foreach ($_FILES['files']['name'] as $field => $filename) {
      // Keep a copy of the unaltered file name.
      $_FILES['files']['orig_name'][$field] = $filename;
      $_FILES['files']['name'][$field] = transliteration_clean_filename($filename, $langcode);
    }
  }
}

