<?php
/*
Plugin Name: wp-utf8-excerpt
Version: 0.5.1
Author: Betty
Author URI: http://myfairland.net/
Plugin URI: http://myfairland.net/wp-utf8-excerpt/
Description: This plugin generates a better excerpt for multibyte language users (Chinese, for example). Besides, it keeps the html tags in the excerpt. 为使用多字节语言（如中文）的Wordpress用户提供更好的摘要算法，以解决Wordpress默认摘要算法只考虑西方语言的不足。此外，此插件产生的摘要可保留原文中的格式。
*/

/* $Id: wp-utf8-excerpt.php 23 2010-01-01 14:23:11Z Betty $ */

/* if the host doesn't support the mb_ functions, we have to define them. From Yskin's wp-CJK-excerpt, thanks to Yskin. */
if ( !function_exists('mb_strlen') ) {
	function mb_strlen ($text, $encode) {
		if ($encode=='UTF-8') {
			return preg_match_all('%(?:
					  [\x09\x0A\x0D\x20-\x7E]           # ASCII
					| [\xC2-\xDF][\x80-\xBF]            # non-overlong 2-byte
					|  \xE0[\xA0-\xBF][\x80-\xBF]       # excluding overlongs
					| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
					|  \xED[\x80-\x9F][\x80-\xBF]       # excluding surrogates
					|  \xF0[\x90-\xBF][\x80-\xBF]{2}    # planes 1-3
					| [\xF1-\xF3][\x80-\xBF]{3}         # planes 4-15
					|  \xF4[\x80-\x8F][\x80-\xBF]{2}    # plane 16
					)%xs',$text,$out);
		}else{
			return strlen($text);
		}
	}
}

/* from Internet, author unknown */
if (!function_exists('mb_substr')) {
    function mb_substr($str, $start, $len = '', $encoding="UTF-8"){
        $limit = strlen($str);
 
        for ($s = 0; $start > 0;--$start) {// found the real start
            if ($s >= $limit)
                break;
 
            if ($str[$s] <= "\x7F")
                ++$s;
            else {
                ++$s; // skip length
 
                while ($str[$s] >= "\x80" && $str[$s] <= "\xBF")
                    ++$s;
            }
        }
 
        if ($len == '')
            return substr($str, $s);
        else
            for ($e = $s; $len > 0; --$len) {//found the real end
                if ($e >= $limit)
                    break;
 
                if ($str[$e] <= "\x7F")
                    ++$e;
                else {
                    ++$e;//skip length
 
                    while ($str[$e] >= "\x80" && $str[$e] <= "\xBF" && $e < $limit)
                        ++$e;
                }
            }
 
        return substr($str, $s, $e - $s);
    }
}

/* the real excerpt function */
if (!function_exists('utf8_excerpt')) {
	function utf8_excerpt ($text) {
		global $post;
	
		if ( '' == $text ) {
//get options			
			$home_excerpt_length = get_option('home_excerpt_length');
			$archive_excerpt_length = get_option('archive_excerpt_length');
			$allowd_tag = get_option('allowd_tag');
//set default options
			if ('' == $allowd_tag) {$allowd_tag = '<img><a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>';}	
			if ('' == $home_excerpt_length) {$home_excerpt_length = 300;}
			if ('' == $archive_excerpt_length) {$archive_excerpt_length = 150;}		
//			if is home, display 300 character, otherwise 150 characters
			if ('http://' . $_SERVER["HTTP_HOST"] . $_SERVER["REQUEST_URI"] == get_option('home'). '/') {
				$length = $home_excerpt_length;
			} else {
				$length = $archive_excerpt_length;
			}			

//get and trim the text
			$text = $post->post_content;
			$text = apply_filters('the_content', $text);
			$text = str_replace(']]>', ']]&gt;', $text);
			$text = trim($text);
			
//			check if the post is already short
			if($length > mb_strlen(strip_tags($text), 'utf-8')) {
				return $text;
			}
				
//          check if there is a <!--more--> tag
            $more_position = stripos ($text, "<!--more-->");
            if ($more_position !== false) {
                $text = substr ($text, 0, $more_position);
            } 
            else {

// strip tags now, otherwise the <!--more--> tag is also stripped
				$text = strip_tags($text, $allowd_tag); 		
				$text = trim($text);
            	
//		 		check if the character is worth counting (ie. not part of an HTML tag). From Bas van Doren's Advanced Excerpt, thanks to Bas van Doren.
				$num = 0;
				$in_tag = false;
				for ($i=0; $num<$length || $in_tag; $i++) {
					if(mb_substr($text, $i, 1) == '<')
						$in_tag = true;
					elseif(mb_substr($text, $i, 1) == '>')
						$in_tag = false;
					elseif(!$in_tag)
						$num++;
				}
				$text = mb_substr ($text,0,$i, 'utf-8');            
            }
           
		}

				$text .= "[......]";
				$text = force_balance_tags($text);
		
// 		add a "read more" link
		$text .= "<p class='read-more'><a href='".get_permalink()."'>继续阅读</a></p>";
		return $text;
	}
}

add_filter('get_the_excerpt', 'utf8_excerpt', 9);


/* the options  */
function utf8_excerpt_menu(){
	add_options_page('Excerpt Options', 'Excerpt', 8, __FILE__, 'utf8_excerpt_options');	
}

function utf8_excerpt_options() {
?>
<div class="wrap">
    <h2>
        <?php _e( 'Excerpt Options' );s ?>
    </h2>

<form name="form1" method="post" action="options.php">
<?php wp_nonce_field('update-options'); ?>

<table class="form-table">
	<tr valign="top">
		<th scope="row"><?php _e('Length of excerpts on homepage:' ); ?></th>
		<td><input type="text" name="home_excerpt_length" value="<?php if (!get_option('home_excerpt_length')) { echo 300; } else { echo get_option('home_excerpt_length');} ?>" /><?php _e('characers' ); ?></td>
	</tr>
	<tr valign="top">
		<th scope="row"><?php _e('Length of excerpts on archive pages:' ); ?></th>
		<td><input type="text" name="archive_excerpt_length" value="<?php  if ('' == get_option('archive_excerpt_length')) { echo 150; } else { echo get_option('archive_excerpt_length');} ?>"/><?php _e('characers' ); ?></td>
	</tr>
	<tr valign="top">
		<th scope="row"><?php _e('Allow these HTML tags:' ); ?></th>
		<td><input type="text" name="allowd_tag" value="<?php if (!get_option('allowd_tag')) { echo '<img><a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'; } else { echo get_option('allowd_tag');} ?>" style="width:400px" /></td>
	</tr>
<!-- If the options are not set, load the default values. 
The default values are the same as the same-name variables in the main function.
Because they are local variables, I have to write the value out here, which I don't like. -->

</table>

<input type="hidden" name="action" value="update" />
<input type="hidden" name="page_options" value="home_excerpt_length,archive_excerpt_length, allowd_tag" />

<p class="submit">
<input type="submit" class="button-primary" name="Submit" value="<?php _e('Save Changes' ) ?>" />
</p>

</form>
</div>
<?php
}

add_action('admin_menu', 'utf8_excerpt_menu');
?>