From f6db8369261317a74835728c8d00467b9c0230de Mon Sep 17 00:00:00 2001 From: Marc Alexander Date: Wed, 11 May 2011 14:26:27 +0200 Subject: [PATCH] Merged nickvergessen's phpbb3-tools-trim-message replacing old get_sub_taged_string function --- root/portal/includes/functions.php | 171 +------ root/portal/includes/trim_message/bbcodes.php | 422 ++++++++++++++++++ .../includes/trim_message/trim_message.php | 190 ++++++++ 3 files changed, 629 insertions(+), 154 deletions(-) create mode 100644 root/portal/includes/trim_message/bbcodes.php create mode 100644 root/portal/includes/trim_message/trim_message.php diff --git a/root/portal/includes/functions.php b/root/portal/includes/functions.php index d30e3703..bdbcda7a 100644 --- a/root/portal/includes/functions.php +++ b/root/portal/includes/functions.php @@ -390,151 +390,30 @@ function character_limit(&$title, $limit = 0) } } -// Don't let them mess up the complete portal layout in cut messages and do some real AP magic -function is_valid_bbtag($str, $bbuid) +/** +* Cut post text to given length +* +* @param $message string post text +* @param $bbcode_uid string bbcode uid +* @param $length int The desired length +*/ +function get_sub_taged_string($message, $bbcode_uid, $length) { - return (substr($str,0,1) == '[') && (strpos($str, ':'.$bbuid.']') > 0); -} + global $phpbb_root_path, $phpEx; -function get_end_bbtag($tag, $bbuid) -{ - $etag = ''; - for ($i=0;$i')+1); - return substr($str,0,strpos($str, $lim, strlen($lim))+strlen($lim)); -} - -function get_sub_taged_string($str, $bbuid, $maxlen) -{ - $sl = $str; - $ret = ''; - $ntext = ''; - $lret = ''; - $i = 0; - $cnt = $maxlen; - $last = ''; - $arr = array(); - - while ((strlen($ntext) < $cnt) && (strlen($sl) > 0)) - { - $sr = ''; - if (substr($sl, 0, 1) == '[') - { - $sr = substr($sl,0,strpos($sl,']')+1); - } - /* GESCHLOSSENE HTML-TAGS BEACHTEN */ - if (substr($sl, 0, 2) == '')+1); - $ret .= $sr; - } - else if (is_valid_bbtag($sr, $bbuid)) - { - if ($sr[1] == '/') - { - /* entfernt das endtag aus dem tag array */ - $tarr = array(); - $j = 0; - foreach ($arr as $elem) - { - if (strcmp($elem[1],$sr) != 0) - { - $tarr[++$j] = $elem; - } - } - $arr = $tarr; - } - else - { - $arr[$i][0] = $sr; - $arr[++$i][1] = get_end_bbtag($sr, $bbuid); - } - $ret .= $sr; - } - else - { - $sr = get_next_word($sl); - $ret .= $sr; - $ntext .= $sr; - $last = $sr; - } - $sl = substr($sl, strlen($sr), strlen($sl)-strlen($sr)); + include(PORTAL_ROOT_PATH . 'includes/trim_message/bbcodes.' . $phpEx); } - $ap = ''; - - foreach ($arr as $elem) - { - $ap = $elem[1] . $ap; - } - - $ret .= $ap; - $ret = trim($ret); - if (substr($ret, -4) == ''; - } - $ret = add_endtag($ret); - $ret = $ret . '...'; - return $ret; + $object = new phpbb_trim_message($message, $bbcode_uid, $length); + // Ready to get parsed: + return $object->message(); } function ap_validate($str) @@ -693,22 +572,6 @@ function sql_table_exists($table_name) return false; } -/** -* check for invalid link tag at the end of a cut string -*/ -function add_endtag ($message = '') -{ - $check = (int) strrpos($message, ''; - } - - return $message; -} - /** * get topic tracking info for news * based on get_complete_tracking_info of phpBB3 diff --git a/root/portal/includes/trim_message/bbcodes.php b/root/portal/includes/trim_message/bbcodes.php new file mode 100644 index 00000000..0d0ff066 --- /dev/null +++ b/root/portal/includes/trim_message/bbcodes.php @@ -0,0 +1,422 @@ + +* @package trim_message +* @copyright 2011 +* @license http://opensource.org/licenses/gpl-license.php GNU Public License +* @version 1.0 +*/ + +/** +* @ignore +*/ +if (!defined('IN_PHPBB')) +{ + exit; +} + +/** +* phpbb_trim_message_bbcodes class +*/ +class phpbb_trim_message_bbcodes +{ + /** + * Variables + */ + private $message = ''; + private $bbcode_uid = ''; + private $bbcode_list = array(); + private $array_size = 0; + private $max_content_length = 0; + private $cur_content_length = 0; + private $cur_position = 0; + public $trim_position = 0; + public $is_trimmed = false; + + /** + * Constructor + * + * @param string $message parsed message you want to trim + * @param string $bbcode_uid bbcode_uid of the post + */ + public function __construct($message, $bbcode_uid, $content_length) + { + $this->message = $message; + $this->bbcode_uid = $bbcode_uid; + $this->max_content_length = $content_length; + $this->array_size = 0; + } + + public function get_bbcodes() + { + $bbcode_end_length = utf8_strlen(':' . $this->bbcode_uid . ']'); + $quote_end_length = utf8_strlen('":' . $this->bbcode_uid . ']'); + + $possible_bbcodes = explode('[', $this->message); + $content_length = $this->get_content_length($possible_bbcodes[0]); + if ($content_length >= $this->max_content_length) + { + $allowed_content_position = $this->get_content_position($possible_bbcodes[0], $this->max_content_length); + $this->trim_position = $this->cur_position + $allowed_content_position; + // As we did not touch any bbcodes yet, we can just skip all that. + if (!$this->max_content_length || ($content_length > $this->max_content_length)) + { + $this->is_trimmed = true; + } + return; + } + $this->cur_position += utf8_strlen($possible_bbcodes[0]) + 1; + $this->cur_content_length += $content_length; + + // Skip the first one. + array_shift($possible_bbcodes); + $num_possible_bbcodes = sizeof($possible_bbcodes); + $num_tested_bbcodes = 0; + $start_of_last_part = 0; + + $allow_close_quote = false; + + foreach ($possible_bbcodes as $part) + { + $num_tested_bbcodes++; + $exploded_parts = explode(':' . $this->bbcode_uid . ']', $part); + $num_parts = sizeof($exploded_parts); + + /** + * One element means we do not match an end before the next opening: + * String: [quote="[bbcode:uid]foobar[/bbcode:uid]":uid] + * Keys: ^^^^^^^ = 0 + */ + if ($num_parts == 1) + { + // 1 means, we are in [quote="":uid] and found another bbcode here. + if (utf8_strpos($exploded_parts[0], 'quote="') === 0) + { + $open_end_quote = utf8_strpos($this->message, '":' . $this->bbcode_uid . ']', $this->cur_position); + if ($open_end_quote !== false) + { + $close_quote = utf8_strpos($this->message, '[/quote:' . $this->bbcode_uid . ']', $open_end_quote); + if ($close_quote !== false) + { + $open_end_quote += $quote_end_length; + $this->open_bbcode('quote', $this->cur_position); + $this->bbcode_action('quote', 'open_end', $open_end_quote); + $this->cur_position += utf8_strlen($exploded_parts[0]); + + // We allow the 3-keys special-case, when we have found a beginning before... + $allow_close_quote = true; + } + } + } + } + /** + * Two element is hte normal case: + * String: [bbcode:uid]foobar + * Keys: ^^^^^^ = 0 ^^^^^^ = 1 + * String: [/bbcode:uid]foobar + * Keys: ^^^^^^^ = 0 ^^^^^^ = 1 + */ + elseif ($num_parts == 2) + { + // We matched it something ;) + if ($exploded_parts[0][0] != '/') + { + // Open BBCode-tag + $bbcode_tag = $this->filter_bbcode_tag($exploded_parts[0]); + + $this->open_bbcode($bbcode_tag, $this->cur_position); + $this->cur_position += utf8_strlen($exploded_parts[0]) + $bbcode_end_length; + $this->bbcode_action($bbcode_tag, 'open_end', $this->cur_position); + + if (!$allow_close_quote) + { + // If we allow a closing quote, we are in the username. + // We do not count that as content-length. + $content_length = $this->get_content_length($exploded_parts[1]); + $max_content_allowed = ($this->max_content_length - $this->cur_content_length); + if (($content_length >= $max_content_allowed) && !$this->trim_position) + { + $allowed_content_position = $this->get_content_position($exploded_parts[1], $max_content_allowed); + $this->trim_position = $this->cur_position + $allowed_content_position; + } + $this->cur_content_length += $content_length; + } + $this->cur_position += utf8_strlen($exploded_parts[1]); + } + else + { + // Close BBCode-tag + $bbcode_tag = $this->filter_bbcode_tag($exploded_parts[0]); + $bbcode_tag_extended = $this->filter_bbcode_tag($exploded_parts[0], false); + if ($bbcode_tag_extended == $bbcode_tag) + { + $bbcode_tag_extended = ''; + } + + $this->bbcode_action($bbcode_tag, 'close_start', $this->cur_position); + $this->cur_position += utf8_strlen($exploded_parts[0]) + $bbcode_end_length; + $this->bbcode_action($bbcode_tag, 'close_end', $this->cur_position, $bbcode_tag_extended); + + if (!$allow_close_quote) + { + // If we allow a closing quote, we are in the username. + // We do not count that as content-length. + $content_length = $this->get_content_length($exploded_parts[1]); + $max_content_allowed = ($this->max_content_length - $this->cur_content_length); + if (($content_length >= $max_content_allowed) && !$this->trim_position) + { + $allowed_content_position = $this->get_content_position($exploded_parts[1], $max_content_allowed); + $this->trim_position = $this->cur_position + $allowed_content_position; + } + $this->cur_content_length += $content_length; + } + $this->cur_position += utf8_strlen($exploded_parts[1]); + } + } + /** + * Three elements means we are closing the opening-quote and the BBCode from inside: + * String: [quote="[bbcode:uid]foo[/bbcode:uid]bar":uid]quotehere + * Keys: ^^^^^^^ = 0 ^^^^ = 1 ^^^^^^^^^ = 2 + */ + elseif ($num_parts == 3) + { + if (($exploded_parts[0][0] == '/') && (utf8_substr($exploded_parts[1], -6) == '"') && $allow_close_quote) + { + $bbcode_tag = $this->filter_bbcode_tag($exploded_parts[0]); + + $this->bbcode_action($bbcode_tag, 'close_start', $this->cur_position); + $this->cur_position += utf8_strlen($exploded_parts[0]) + $bbcode_end_length; + $this->bbcode_action($bbcode_tag, 'close_end', $this->cur_position); + $this->cur_position += utf8_strlen($exploded_parts[1]) + $bbcode_end_length; + + $content_length = $this->get_content_length($exploded_parts[2]); + $max_content_allowed = ($this->max_content_length - $this->cur_content_length); + if (($content_length >= $max_content_allowed) && !$this->trim_position) + { + $allowed_content_position = $this->get_content_position($exploded_parts[2], $max_content_allowed); + $this->trim_position = $this->cur_position + $allowed_content_position; + } + $this->cur_position += utf8_strlen($exploded_parts[2]); + $this->cur_content_length += $content_length; + + $allow_close_quote = false; + } + } + + // Increase by one for the [ we explode on. + $this->cur_position++; + } + + if ($this->cur_content_length > $this->max_content_length) + { + $this->is_trimmed = true; + } + } + + /** + * Add a bbcode to the bbcode-list + * + * @param string $tag BBCode-tag, Exp: code + * @param int $open_start start-position of the bbcode-open-tag + * (Exp: >[bbcode_list[] = array( + 'bbcode_tag' => $tag, + 'open_start' => $open_start, + 'open_end' => 0, + 'close_start' => 0, + 'close_end' => 0, + ); + $this->array_size++; + } + + /** + * Add position to a listed bbcode + * + * @param string $tag BBCode-tag, Exp: code + * @param string $part part can be one of the following: + * i) open_end => [code>]<[/code] + * ii) close_open => [code]>[ [code][/code>]< + * @param int $position start-position of the bbcode-open-tag + * @param int $tag_extended with the list-bbcode we get some + * information about the bbcode at the end + * of it. So we need to readd that. + */ + private function bbcode_action($tag, $part, $position, $tag_extended = false) + { + for ($i = 1; $i <= $this->array_size; $i++) + { + if ($this->bbcode_list[$this->array_size - $i]['bbcode_tag'] == $tag) + { + if (!$this->bbcode_list[$this->array_size - $i][$part]) + { + $this->bbcode_list[$this->array_size - $i][$part] = $position; + if ($tag_extended) + { + $this->bbcode_list[$this->array_size - $i]['bbcode_tag'] = $tag_extended; + } + return; + } + } + } + } + + /** + * Removes all BBcodes after a given position + */ + public function remove_bbcodes_after() + { + for ($i = 1; $i <= $this->array_size; $i++) + { + if ($this->bbcode_list[$this->array_size - $i]['open_start'] >= $this->trim_position) + { + unset($this->bbcode_list[$this->array_size - $i]); + } + } + + $this->array_size = sizeof($this->bbcode_list); + } + + /** + * Returns an array with BBCodes that need to be closed, after the position. + */ + public function get_open_bbcodes_after($position) + { + $bbcodes = array(); + for ($i = 1; $i <= $this->array_size; $i++) + { + if (($this->bbcode_list[$this->array_size - $i]['open_start'] < $position) && + ($this->bbcode_list[$this->array_size - $i]['close_start'] >= $position)) + { + $bbcodes[] = $this->bbcode_list[$this->array_size - $i]['bbcode_tag']; + } + } + return $bbcodes; + } + + /** + * Get the length of the content (substract code for smilie and url parsing) + * + * @param string $content Message to get the content length from + * Exp: text + * Content: ^^^^ + * + * @return int length of content without special markup + */ + static public function get_content_length($content) + { + $content_length = utf8_strlen($content); + $last_html_opening = $last_html_closing = $last_smiley = false; + while (($last_html_opening = utf8_strpos($content, '<', $last_html_closing)) !== false) + { + $last_html_closing = utf8_strpos($content, '>', $last_html_opening); + if (($smiley_code = utf8_substr($content, $last_html_opening + 7, ($last_html_closing - $last_html_opening - 11))) != '--') + { + if ($last_smiley == $smiley_code) + { + $content_length += utf8_strlen($smiley_code); + $last_smiley = false; + } + else + { + $last_smiley = $smiley_code; + } + } + $content_length -= ($last_html_closing - $last_html_opening) + 1; + } + return $content_length; + } + + /** + * Get the position in the text, where we need to cut the message. + * + * Exp: sampletext AL = 8 + * Content: ^^^^^^^^^^^^^^^^ Text-Position = 16 + * + * @param string $content Message to get the position in + * @param int $allowed_length Content length we are allowed to add. + * + * @return int position in the markup-text where we cut the text + */ + static public function get_content_position($content, $allowed_length) + { + if (utf8_strpos(utf8_substr($content, 0, $allowed_length), '<') === false) + { + /** + * If we did not find any HTML in our section, we can cut it. + * Exp: sampletext AL = 3 + * Content: ^^^ Text-Position = 3 + */ + return $allowed_length; + } + + $content_length = $allowed_length; + $start_position = 0; + $last_smiley = false; + while (($last_html_opening = utf8_strpos(utf8_substr($content, 0, $content_length), '<', $start_position)) !== false) + { + // foreach markup we find in the string, we enlarge our text-size. + $last_html_closing = utf8_strpos($content, '>', $last_html_opening); + $content_length += ($last_html_closing - $last_html_opening) + 1; + + $smiley_code = utf8_substr($content, $last_html_opening + 7, ($last_html_closing - $last_html_opening - 11)); + if (($smiley_code != '--') && (utf8_strpos($smiley_code, 'c="{SMILIES_PATH}/') === false)) + { + if ($last_smiley == $smiley_code) + { + $content_length -= utf8_strlen($smiley_code); + $last_smiley = false; + } + else + { + $last_smiley = $smiley_code; + } + } + + $start_position = $last_html_opening + 1; + } + + return $content_length; + } + + /** + * Filter BBCode-Tags: + * + * Exp: [/*:m] <= automatically added end of [*] + * Exp: [/list:x] <= end of [list] tag with list-style-element + * Exp: [bbcode=param1;param2] <= start of bbcode-tag with parameters + * + * @return string plain bbcode-tag + */ + static public function filter_bbcode_tag($bbcode_tag, $strip_information = true) + { + if ($bbcode_tag[0] == '/') + { + $bbcode_tag = utf8_substr($bbcode_tag, 1); + } + + if ($strip_information && ($bbcode_tag == '*:m')) + { + return '*'; + } + + if ($strip_information && (utf8_substr($bbcode_tag, 0, 5) == 'list:')) + { + return 'list'; + } + + if ($strip_information && (($equals = utf8_strpos($bbcode_tag, '=')) !== false)) + { + $bbcode_tag = utf8_substr($bbcode_tag, 0, $equals); + } + + return $bbcode_tag; + } +} diff --git a/root/portal/includes/trim_message/trim_message.php b/root/portal/includes/trim_message/trim_message.php new file mode 100644 index 00000000..d9c3d436 --- /dev/null +++ b/root/portal/includes/trim_message/trim_message.php @@ -0,0 +1,190 @@ + +* @package trim_message +* @copyright 2011 +* @license http://opensource.org/licenses/gpl-license.php GNU Public License +* @version 1.0 +*/ + +/** +* @ignore +*/ +if (!defined('IN_PHPBB')) +{ + exit; +} + +/** +* phpbb_trim_message class +*/ +class phpbb_trim_message +{ + /** + * Variables + */ + private $message = ''; + private $trimmed_message = ''; + private $bbcode_uid = ''; + private $append_str = ''; + private $length = 0; + private $length_tolerance = 0; + private $is_trimmed = null; + private $bbcodes = null; + + /** + * Constructor + * + * @param string $message parsed message you want to trim + * @param string $bbcode_uid bbcode_uid of the post + * @param int $length length the code should be trimmed to + * @param string $append_str text that is appended after trimmed message + * @param int $tolerance tolerance for the message: we don't trim it + * if it is shorter than length + tolerance. + */ + public function __construct($message, $bbcode_uid, $length, $append_str = ' [...]', $tolerance = 25) + { + $this->message = $message; + $this->bbcode_uid = $bbcode_uid; + $this->append_str = $append_str; + $this->length = (int) $length; + $this->length_tolerance = (int) $tolerance; + } + + /** + * Did we trim the message, or was it short enough? + */ + public function is_trimmed() + { + return (bool) $this->is_trimmed; + } + + /** + * Returns the message, trimmed or in full length + */ + public function message($force_full_length = false) + { + if (is_null($this->is_trimmed) && !$force_full_length) + { + $this->is_trimmed = $this->trim(); + } + + return ($this->is_trimmed && !$force_full_length) ? $this->trimmed_message : $this->message; + } + + /** + * Filter some easy cases where we can return the result easily + * + * @return bool Returns whether the message was trimmed or not. + */ + private function trim() + { + if (utf8_strlen($this->message) <= ($this->length + $this->length_tolerance)) + { + return false; + } + + if (!$this->bbcode_uid) + { + $this->trimmed_message = utf8_substr($this->message, 0, $this->length) . $this->append_str; + return true; + } + + $this->trim_action(); + return $this->bbcodes->is_trimmed; + } + + /** + * Do some magic... uhhh + */ + private function trim_action() + { + /** + * Prepare the difficult action + */ + $this->trimmed_message = $this->message; + $this->bbcodes = new phpbb_trim_message_bbcodes($this->trimmed_message, $this->bbcode_uid, $this->length); + + /** + * Step 1: Get a list of all BBCodes + */ + $this->bbcodes->get_bbcodes(); + + /** + * Step 2: Remove all bbcodes from the list, that are opened after + * the trim-position + */ + $this->bbcodes->remove_bbcodes_after(); + + /** + * Step 3: Trim message + */ + $this->trimmed_message = utf8_substr($this->message, 0, $this->bbcodes->trim_position); + + /** + * Step 4: i) Remove links/emails/smilies that are cut, somewhere + * in the middle + * ii) Renew trim-position if we did something + * iii) Append the message that is provided + */ + $this->remove_broken_links(); + $text_length = utf8_strlen($this->trimmed_message); + if ($this->bbcodes->is_trimmed) + { + $this->trimmed_message .= $this->append_str; + } + + /** + * Step 5: Close open BBCodes + */ + $open_bbcodes = $this->bbcodes->get_open_bbcodes_after($text_length); + $this->close_bbcodes($open_bbcodes); + } + + /** + * Removes broken smilies, emails and links without the URL-tag. + */ + private function remove_broken_links() + { + $open_brakets = substr_count($this->trimmed_message, '<'); + $closing_brakets = substr_count($this->trimmed_message, '>'); + if ($open_brakets != $closing_brakets) + { + /** + * There was an open braket for an unparsed link + * Example: <{cut}!-- l --> + */ + $this->trimmed_message = utf8_substr($this->trimmed_message, 0, utf8_strrpos($this->trimmed_message, '<')); + } + + $open_link = substr_count($this->trimmed_message, '{cut} + */ + $this->trimmed_message = utf8_substr($this->trimmed_message, 0, utf8_strrpos($this->trimmed_message, '