Merge pull request #14 from marc1706/master

Merged nickvergessen's phpbb3-tools-trim-message replacing old get_sub_taged_string function
This commit is contained in:
Marc Alexander
2011-05-11 05:28:42 -07:00
3 changed files with 629 additions and 154 deletions

View File

@@ -390,151 +390,30 @@ function character_limit(&$title, $limit = 0)
}
}
// Don't let them mess up the complete portal layout in cut messages and do some real AP magic
function is_valid_bbtag($str, $bbuid)
/**
* Cut post text to given length
*
* @param $message string post text
* @param $bbcode_uid string bbcode uid
* @param $length int The desired length
*/
function get_sub_taged_string($message, $bbcode_uid, $length)
{
return (substr($str,0,1) == '[') && (strpos($str, ':'.$bbuid.']') > 0);
global $phpbb_root_path, $phpEx;
if(!class_exists('phpbb_trim_message'))
{
include(PORTAL_ROOT_PATH . 'includes/trim_message/trim_message.' . $phpEx);
}
function get_end_bbtag($tag, $bbuid)
if(!class_exists('phpbb_trim_message_bbcodes'))
{
$etag = '';
for ($i=0;$i<strlen($tag);++$i)
{
if ($tag[$i] == '[')
{
$etag .= $tag[$i] . '/';
}
else if (($tag[$i] == '=') || ($tag[$i] == ':'))
{
if ($tag[1] == '*')
{
$etag .= ':m:'.$bbuid.']';
}
else if (substr($tag, 0, 6) == '[list=')
{
$etag .= ':o:'.$bbuid.']';
}
else if (substr($tag, 0, 5) == '[list')
{
$etag .= ':u:'.$bbuid.']';
}
else
{
$etag .= ':'.$bbuid.']';
}
break;
}
else
{
$etag .= $tag[$i];
}
}
return $etag;
include(PORTAL_ROOT_PATH . 'includes/trim_message/bbcodes.' . $phpEx);
}
function get_next_word($str)
{
$ret = '';
for ($i=0;$i<strlen($str);++$i)
{
switch ($str[$i])
{
case ' ': //$ret .= ' '; break; break;
return $ret . ' ';
case '\\':
if ($str[$i+1] == 'n') return $ret . '\n';
case '[': if ($i != 0) return $ret;
default: $ret .= $str[$i];
}
}
return $ret;
}
function get_next_bbhtml_part($str)
{
$lim = substr($str,0,strpos($str,'>')+1);
return substr($str,0,strpos($str, $lim, strlen($lim))+strlen($lim));
}
function get_sub_taged_string($str, $bbuid, $maxlen)
{
$sl = $str;
$ret = '';
$ntext = '';
$lret = '';
$i = 0;
$cnt = $maxlen;
$last = '';
$arr = array();
while ((strlen($ntext) < $cnt) && (strlen($sl) > 0))
{
$sr = '';
if (substr($sl, 0, 1) == '[')
{
$sr = substr($sl,0,strpos($sl,']')+1);
}
/* GESCHLOSSENE HTML-TAGS BEACHTEN */
if (substr($sl, 0, 2) == '<!')
{
$sr = get_next_bbhtml_part($sl);
$ret .= $sr;
}
else if (substr($sl, 0, 1) == '<')
{
$sr = substr($sl,0,strpos($sl,'>')+1);
$ret .= $sr;
}
else if (is_valid_bbtag($sr, $bbuid))
{
if ($sr[1] == '/')
{
/* entfernt das endtag aus dem tag array */
$tarr = array();
$j = 0;
foreach ($arr as $elem)
{
if (strcmp($elem[1],$sr) != 0)
{
$tarr[++$j] = $elem;
}
}
$arr = $tarr;
}
else
{
$arr[$i][0] = $sr;
$arr[++$i][1] = get_end_bbtag($sr, $bbuid);
}
$ret .= $sr;
}
else
{
$sr = get_next_word($sl);
$ret .= $sr;
$ntext .= $sr;
$last = $sr;
}
$sl = substr($sl, strlen($sr), strlen($sl)-strlen($sr));
}
$ap = '';
foreach ($arr as $elem)
{
$ap = $elem[1] . $ap;
}
$ret .= $ap;
$ret = trim($ret);
if (substr($ret, -4) == '<!--')
{
$ret .= ' -->';
}
$ret = add_endtag($ret);
$ret = $ret . '...';
return $ret;
$object = new phpbb_trim_message($message, $bbcode_uid, $length);
// Ready to get parsed:
return $object->message();
}
function ap_validate($str)
@@ -693,22 +572,6 @@ function sql_table_exists($table_name)
return false;
}
/**
* check for invalid link tag at the end of a cut string
*/
function add_endtag ($message = '')
{
$check = (int) strrpos($message, '<!-- m --><a '); // @todo: add strripos back if we move to PHP5 !!
$check_2 = (int) strrpos($message, '</a><!--'); // @todo: add strripos back if we move to PHP5 !!
if (((isset($check) && $check > 0) && ($check_2 <= $check)) || ((isset($check) && $check > 0) && !isset($check_2)))
{
$message .= '</a><!-- m -->';
}
return $message;
}
/**
* get topic tracking info for news
* based on get_complete_tracking_info of phpBB3

View File

@@ -0,0 +1,422 @@
<?php
/**
* This file contains a class, to manage the bbcodes of a given phpbb
* message_parser message.
*
* @author Joas Schilling <nickvergessen at gmx dot de>
* @package trim_message
* @copyright 2011
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
* @version 1.0
*/
/**
* @ignore
*/
if (!defined('IN_PHPBB'))
{
exit;
}
/**
* phpbb_trim_message_bbcodes class
*/
class phpbb_trim_message_bbcodes
{
/**
* Variables
*/
private $message = '';
private $bbcode_uid = '';
private $bbcode_list = array();
private $array_size = 0;
private $max_content_length = 0;
private $cur_content_length = 0;
private $cur_position = 0;
public $trim_position = 0;
public $is_trimmed = false;
/**
* Constructor
*
* @param string $message parsed message you want to trim
* @param string $bbcode_uid bbcode_uid of the post
*/
public function __construct($message, $bbcode_uid, $content_length)
{
$this->message = $message;
$this->bbcode_uid = $bbcode_uid;
$this->max_content_length = $content_length;
$this->array_size = 0;
}
public function get_bbcodes()
{
$bbcode_end_length = utf8_strlen(':' . $this->bbcode_uid . ']');
$quote_end_length = utf8_strlen('&quot;:' . $this->bbcode_uid . ']');
$possible_bbcodes = explode('[', $this->message);
$content_length = $this->get_content_length($possible_bbcodes[0]);
if ($content_length >= $this->max_content_length)
{
$allowed_content_position = $this->get_content_position($possible_bbcodes[0], $this->max_content_length);
$this->trim_position = $this->cur_position + $allowed_content_position;
// As we did not touch any bbcodes yet, we can just skip all that.
if (!$this->max_content_length || ($content_length > $this->max_content_length))
{
$this->is_trimmed = true;
}
return;
}
$this->cur_position += utf8_strlen($possible_bbcodes[0]) + 1;
$this->cur_content_length += $content_length;
// Skip the first one.
array_shift($possible_bbcodes);
$num_possible_bbcodes = sizeof($possible_bbcodes);
$num_tested_bbcodes = 0;
$start_of_last_part = 0;
$allow_close_quote = false;
foreach ($possible_bbcodes as $part)
{
$num_tested_bbcodes++;
$exploded_parts = explode(':' . $this->bbcode_uid . ']', $part);
$num_parts = sizeof($exploded_parts);
/**
* One element means we do not match an end before the next opening:
* String: [quote="[bbcode:uid]foobar[/bbcode:uid]":uid]
* Keys: ^^^^^^^ = 0
*/
if ($num_parts == 1)
{
// 1 means, we are in [quote="":uid] and found another bbcode here.
if (utf8_strpos($exploded_parts[0], 'quote=&quot;') === 0)
{
$open_end_quote = utf8_strpos($this->message, '&quot;:' . $this->bbcode_uid . ']', $this->cur_position);
if ($open_end_quote !== false)
{
$close_quote = utf8_strpos($this->message, '[/quote:' . $this->bbcode_uid . ']', $open_end_quote);
if ($close_quote !== false)
{
$open_end_quote += $quote_end_length;
$this->open_bbcode('quote', $this->cur_position);
$this->bbcode_action('quote', 'open_end', $open_end_quote);
$this->cur_position += utf8_strlen($exploded_parts[0]);
// We allow the 3-keys special-case, when we have found a beginning before...
$allow_close_quote = true;
}
}
}
}
/**
* Two element is hte normal case:
* String: [bbcode:uid]foobar
* Keys: ^^^^^^ = 0 ^^^^^^ = 1
* String: [/bbcode:uid]foobar
* Keys: ^^^^^^^ = 0 ^^^^^^ = 1
*/
elseif ($num_parts == 2)
{
// We matched it something ;)
if ($exploded_parts[0][0] != '/')
{
// Open BBCode-tag
$bbcode_tag = $this->filter_bbcode_tag($exploded_parts[0]);
$this->open_bbcode($bbcode_tag, $this->cur_position);
$this->cur_position += utf8_strlen($exploded_parts[0]) + $bbcode_end_length;
$this->bbcode_action($bbcode_tag, 'open_end', $this->cur_position);
if (!$allow_close_quote)
{
// If we allow a closing quote, we are in the username.
// We do not count that as content-length.
$content_length = $this->get_content_length($exploded_parts[1]);
$max_content_allowed = ($this->max_content_length - $this->cur_content_length);
if (($content_length >= $max_content_allowed) && !$this->trim_position)
{
$allowed_content_position = $this->get_content_position($exploded_parts[1], $max_content_allowed);
$this->trim_position = $this->cur_position + $allowed_content_position;
}
$this->cur_content_length += $content_length;
}
$this->cur_position += utf8_strlen($exploded_parts[1]);
}
else
{
// Close BBCode-tag
$bbcode_tag = $this->filter_bbcode_tag($exploded_parts[0]);
$bbcode_tag_extended = $this->filter_bbcode_tag($exploded_parts[0], false);
if ($bbcode_tag_extended == $bbcode_tag)
{
$bbcode_tag_extended = '';
}
$this->bbcode_action($bbcode_tag, 'close_start', $this->cur_position);
$this->cur_position += utf8_strlen($exploded_parts[0]) + $bbcode_end_length;
$this->bbcode_action($bbcode_tag, 'close_end', $this->cur_position, $bbcode_tag_extended);
if (!$allow_close_quote)
{
// If we allow a closing quote, we are in the username.
// We do not count that as content-length.
$content_length = $this->get_content_length($exploded_parts[1]);
$max_content_allowed = ($this->max_content_length - $this->cur_content_length);
if (($content_length >= $max_content_allowed) && !$this->trim_position)
{
$allowed_content_position = $this->get_content_position($exploded_parts[1], $max_content_allowed);
$this->trim_position = $this->cur_position + $allowed_content_position;
}
$this->cur_content_length += $content_length;
}
$this->cur_position += utf8_strlen($exploded_parts[1]);
}
}
/**
* Three elements means we are closing the opening-quote and the BBCode from inside:
* String: [quote="[bbcode:uid]foo[/bbcode:uid]bar":uid]quotehere
* Keys: ^^^^^^^ = 0 ^^^^ = 1 ^^^^^^^^^ = 2
*/
elseif ($num_parts == 3)
{
if (($exploded_parts[0][0] == '/') && (utf8_substr($exploded_parts[1], -6) == '&quot;') && $allow_close_quote)
{
$bbcode_tag = $this->filter_bbcode_tag($exploded_parts[0]);
$this->bbcode_action($bbcode_tag, 'close_start', $this->cur_position);
$this->cur_position += utf8_strlen($exploded_parts[0]) + $bbcode_end_length;
$this->bbcode_action($bbcode_tag, 'close_end', $this->cur_position);
$this->cur_position += utf8_strlen($exploded_parts[1]) + $bbcode_end_length;
$content_length = $this->get_content_length($exploded_parts[2]);
$max_content_allowed = ($this->max_content_length - $this->cur_content_length);
if (($content_length >= $max_content_allowed) && !$this->trim_position)
{
$allowed_content_position = $this->get_content_position($exploded_parts[2], $max_content_allowed);
$this->trim_position = $this->cur_position + $allowed_content_position;
}
$this->cur_position += utf8_strlen($exploded_parts[2]);
$this->cur_content_length += $content_length;
$allow_close_quote = false;
}
}
// Increase by one for the [ we explode on.
$this->cur_position++;
}
if ($this->cur_content_length > $this->max_content_length)
{
$this->is_trimmed = true;
}
}
/**
* Add a bbcode to the bbcode-list
*
* @param string $tag BBCode-tag, Exp: code
* @param int $open_start start-position of the bbcode-open-tag
* (Exp: >[<code]) in the message
*/
private function open_bbcode($tag, $open_start)
{
$this->bbcode_list[] = array(
'bbcode_tag' => $tag,
'open_start' => $open_start,
'open_end' => 0,
'close_start' => 0,
'close_end' => 0,
);
$this->array_size++;
}
/**
* Add position to a listed bbcode
*
* @param string $tag BBCode-tag, Exp: code
* @param string $part part can be one of the following:
* i) open_end => [code>]<[/code]
* ii) close_open => [code]>[</code]
* iii) close_end => [code][/code>]<
* @param int $position start-position of the bbcode-open-tag
* @param int $tag_extended with the list-bbcode we get some
* information about the bbcode at the end
* of it. So we need to readd that.
*/
private function bbcode_action($tag, $part, $position, $tag_extended = false)
{
for ($i = 1; $i <= $this->array_size; $i++)
{
if ($this->bbcode_list[$this->array_size - $i]['bbcode_tag'] == $tag)
{
if (!$this->bbcode_list[$this->array_size - $i][$part])
{
$this->bbcode_list[$this->array_size - $i][$part] = $position;
if ($tag_extended)
{
$this->bbcode_list[$this->array_size - $i]['bbcode_tag'] = $tag_extended;
}
return;
}
}
}
}
/**
* Removes all BBcodes after a given position
*/
public function remove_bbcodes_after()
{
for ($i = 1; $i <= $this->array_size; $i++)
{
if ($this->bbcode_list[$this->array_size - $i]['open_start'] >= $this->trim_position)
{
unset($this->bbcode_list[$this->array_size - $i]);
}
}
$this->array_size = sizeof($this->bbcode_list);
}
/**
* Returns an array with BBCodes that need to be closed, after the position.
*/
public function get_open_bbcodes_after($position)
{
$bbcodes = array();
for ($i = 1; $i <= $this->array_size; $i++)
{
if (($this->bbcode_list[$this->array_size - $i]['open_start'] < $position) &&
($this->bbcode_list[$this->array_size - $i]['close_start'] >= $position))
{
$bbcodes[] = $this->bbcode_list[$this->array_size - $i]['bbcode_tag'];
}
}
return $bbcodes;
}
/**
* Get the length of the content (substract code for smilie and url parsing)
*
* @param string $content Message to get the content length from
* Exp: <markup>text<markup2>
* Content: ^^^^
*
* @return int length of content without special markup
*/
static public function get_content_length($content)
{
$content_length = utf8_strlen($content);
$last_html_opening = $last_html_closing = $last_smiley = false;
while (($last_html_opening = utf8_strpos($content, '<', $last_html_closing)) !== false)
{
$last_html_closing = utf8_strpos($content, '>', $last_html_opening);
if (($smiley_code = utf8_substr($content, $last_html_opening + 7, ($last_html_closing - $last_html_opening - 11))) != '--')
{
if ($last_smiley == $smiley_code)
{
$content_length += utf8_strlen($smiley_code);
$last_smiley = false;
}
else
{
$last_smiley = $smiley_code;
}
}
$content_length -= ($last_html_closing - $last_html_opening) + 1;
}
return $content_length;
}
/**
* Get the position in the text, where we need to cut the message.
*
* Exp: sample<markup>text<markup2> AL = 8
* Content: ^^^^^^^^^^^^^^^^ Text-Position = 16
*
* @param string $content Message to get the position in
* @param int $allowed_length Content length we are allowed to add.
*
* @return int position in the markup-text where we cut the text
*/
static public function get_content_position($content, $allowed_length)
{
if (utf8_strpos(utf8_substr($content, 0, $allowed_length), '<') === false)
{
/**
* If we did not find any HTML in our section, we can cut it.
* Exp: sample<markup>text<markup2> AL = 3
* Content: ^^^ Text-Position = 3
*/
return $allowed_length;
}
$content_length = $allowed_length;
$start_position = 0;
$last_smiley = false;
while (($last_html_opening = utf8_strpos(utf8_substr($content, 0, $content_length), '<', $start_position)) !== false)
{
// foreach markup we find in the string, we enlarge our text-size.
$last_html_closing = utf8_strpos($content, '>', $last_html_opening);
$content_length += ($last_html_closing - $last_html_opening) + 1;
$smiley_code = utf8_substr($content, $last_html_opening + 7, ($last_html_closing - $last_html_opening - 11));
if (($smiley_code != '--') && (utf8_strpos($smiley_code, 'c="{SMILIES_PATH}/') === false))
{
if ($last_smiley == $smiley_code)
{
$content_length -= utf8_strlen($smiley_code);
$last_smiley = false;
}
else
{
$last_smiley = $smiley_code;
}
}
$start_position = $last_html_opening + 1;
}
return $content_length;
}
/**
* Filter BBCode-Tags:
*
* Exp: [/*:m] <= automatically added end of [*]
* Exp: [/list:x] <= end of [list] tag with list-style-element
* Exp: [bbcode=param1;param2] <= start of bbcode-tag with parameters
*
* @return string plain bbcode-tag
*/
static public function filter_bbcode_tag($bbcode_tag, $strip_information = true)
{
if ($bbcode_tag[0] == '/')
{
$bbcode_tag = utf8_substr($bbcode_tag, 1);
}
if ($strip_information && ($bbcode_tag == '*:m'))
{
return '*';
}
if ($strip_information && (utf8_substr($bbcode_tag, 0, 5) == 'list:'))
{
return 'list';
}
if ($strip_information && (($equals = utf8_strpos($bbcode_tag, '=')) !== false))
{
$bbcode_tag = utf8_substr($bbcode_tag, 0, $equals);
}
return $bbcode_tag;
}
}

View File

@@ -0,0 +1,190 @@
<?php
/**
* This file contains a class, that is able to trim a message from the phpbb
* message_parser to a maximum length without breaking the bbcodes/smilies and
* links.
*
* @author Joas Schilling <nickvergessen at gmx dot de>
* @package trim_message
* @copyright 2011
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
* @version 1.0
*/
/**
* @ignore
*/
if (!defined('IN_PHPBB'))
{
exit;
}
/**
* phpbb_trim_message class
*/
class phpbb_trim_message
{
/**
* Variables
*/
private $message = '';
private $trimmed_message = '';
private $bbcode_uid = '';
private $append_str = '';
private $length = 0;
private $length_tolerance = 0;
private $is_trimmed = null;
private $bbcodes = null;
/**
* Constructor
*
* @param string $message parsed message you want to trim
* @param string $bbcode_uid bbcode_uid of the post
* @param int $length length the code should be trimmed to
* @param string $append_str text that is appended after trimmed message
* @param int $tolerance tolerance for the message: we don't trim it
* if it is shorter than length + tolerance.
*/
public function __construct($message, $bbcode_uid, $length, $append_str = ' [...]', $tolerance = 25)
{
$this->message = $message;
$this->bbcode_uid = $bbcode_uid;
$this->append_str = $append_str;
$this->length = (int) $length;
$this->length_tolerance = (int) $tolerance;
}
/**
* Did we trim the message, or was it short enough?
*/
public function is_trimmed()
{
return (bool) $this->is_trimmed;
}
/**
* Returns the message, trimmed or in full length
*/
public function message($force_full_length = false)
{
if (is_null($this->is_trimmed) && !$force_full_length)
{
$this->is_trimmed = $this->trim();
}
return ($this->is_trimmed && !$force_full_length) ? $this->trimmed_message : $this->message;
}
/**
* Filter some easy cases where we can return the result easily
*
* @return bool Returns whether the message was trimmed or not.
*/
private function trim()
{
if (utf8_strlen($this->message) <= ($this->length + $this->length_tolerance))
{
return false;
}
if (!$this->bbcode_uid)
{
$this->trimmed_message = utf8_substr($this->message, 0, $this->length) . $this->append_str;
return true;
}
$this->trim_action();
return $this->bbcodes->is_trimmed;
}
/**
* Do some magic... uhhh
*/
private function trim_action()
{
/**
* Prepare the difficult action
*/
$this->trimmed_message = $this->message;
$this->bbcodes = new phpbb_trim_message_bbcodes($this->trimmed_message, $this->bbcode_uid, $this->length);
/**
* Step 1: Get a list of all BBCodes
*/
$this->bbcodes->get_bbcodes();
/**
* Step 2: Remove all bbcodes from the list, that are opened after
* the trim-position
*/
$this->bbcodes->remove_bbcodes_after();
/**
* Step 3: Trim message
*/
$this->trimmed_message = utf8_substr($this->message, 0, $this->bbcodes->trim_position);
/**
* Step 4: i) Remove links/emails/smilies that are cut, somewhere
* in the middle
* ii) Renew trim-position if we did something
* iii) Append the message that is provided
*/
$this->remove_broken_links();
$text_length = utf8_strlen($this->trimmed_message);
if ($this->bbcodes->is_trimmed)
{
$this->trimmed_message .= $this->append_str;
}
/**
* Step 5: Close open BBCodes
*/
$open_bbcodes = $this->bbcodes->get_open_bbcodes_after($text_length);
$this->close_bbcodes($open_bbcodes);
}
/**
* Removes broken smilies, emails and links without the URL-tag.
*/
private function remove_broken_links()
{
$open_brakets = substr_count($this->trimmed_message, '<');
$closing_brakets = substr_count($this->trimmed_message, '>');
if ($open_brakets != $closing_brakets)
{
/**
* There was an open braket for an unparsed link
* Example: <{cut}!-- l -->
*/
$this->trimmed_message = utf8_substr($this->trimmed_message, 0, utf8_strrpos($this->trimmed_message, '<'));
}
$open_link = substr_count($this->trimmed_message, '<!-- ');
if (($open_link % 2) == 1)
{
/**
* We did not close all links we opened, so we cut off the message
* before the last open tag ;)
* Example: <!-- l -->{cut}<!-- l -->
*/
$this->trimmed_message = utf8_substr($this->trimmed_message, 0, utf8_strrpos($this->trimmed_message, '<!-- '));
return;
}
}
/**
* Close all open bbcodes
*
* @param array $open_bbcodes Array of all open bbcodes
*/
private function close_bbcodes($open_bbcodes)
{
foreach ($open_bbcodes as $bbcode_tag)
{
$this->trimmed_message .= '[/' . $bbcode_tag . ':' . $this->bbcode_uid . ']';
}
}
}