From 5f0ba2f2a07114667d9846ef19ffda6bf66a4f4c Mon Sep 17 00:00:00 2001 From: Frank Ronny Larsen Date: Fri, 5 Jul 2013 17:35:59 +0200 Subject: [PATCH 1/4] OPPG-413: Validator mostly ready. Huge problems with lists. --- h5p.classes.php | 256 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) diff --git a/h5p.classes.php b/h5p.classes.php index 36dc3b3..f7313c7 100644 --- a/h5p.classes.php +++ b/h5p.classes.php @@ -1071,11 +1071,267 @@ class H5PCore { * * @param array $library * With keys machineName, majorVersion and minorVersion + * @param boolean $folderName + * Use hyphen instead of space in returned string. * @return string * On the form {machineName} {majorVersion}.{minorVersion} */ public function libraryToString($library, $folderName = FALSE) { return $library['machineName'] . ($folderName ? '-' : ' ') . $library['majorVersion'] . '.' . $library['minorVersion']; } + + /** + * Writes library data as string on the form {machineName} {majorVersion}.{minorVersion} + * + * @param string $libraryString + * On the form {machineName} {majorVersion}.{minorVersion} + * @return array|FALSE + * With keys machineName, majorVersion and minorVersion. + * Returns FALSE only if string is not parsable in the normal library + * string formats "Lib.Name-x.y" or "Lib.Name x.y" + */ + public function libraryFromString($libraryString) { + $re = '/^([\w0-9\-\.]{1,255})[\-\ ]([0-9]{1,5})\.([0-9]{1,5})$/i'; + $matches = array(); + $res = preg_match($re, $libraryString, $matches); + if ($res) { + return array( + 'machineName' => $matches[1], + 'majorVersion' => $matches[2], + 'minorVersion' => $matches[3] + ); + } + return FALSE; + } } + +/** + * Functions for validating basic types from H5P library semantics. + */ +class H5PContentValidator { + public $h5pF; + public $h5pC; + private $typeMap; + private $semanticsCache; + + /** + * Constructor for the H5PContentValidator + * + * @param object $H5PFramework + * The frameworks implementation of the H5PFrameworkInterface + * @param object $H5PCore + * The main H5PCore instance + */ + public function __construct($H5PFramework, $H5PCore) { + $this->h5pF = $H5PFramework; + $this->h5pC = $H5PCore; + $this->typeMap = array( + 'text' => 'validateText', + 'number' => 'validateNumber', + 'boolean' => 'validateBoolean', + 'list' => 'validateList', + 'group' => 'validateGroup', + 'image' => 'validateImage', + 'video' => 'validateVideo', + 'audio' => 'validateAudio', + 'select' => 'validateSelect', + 'library' => 'validateLibrary', + ); + // Cache for semantics used within this validation to avoid unneccessary + // json_decodes if a library is used multiple times. + $this->semanticsCache = array(); + } + + /** + * Validate the given value from content with the matching semantics + * object from semantics + * + * Function will recurse via external functions for container objects like + * 'list', 'group' and 'library'. + * + * @param object $value + * Object to be verified. May be a string or an array. (normal or keyed) + * @param object $semantics + * Semantics object from semantics.json for main library. Further + * semantics will be loaded from H5PFramework if any libraries are + * found within the value data. + */ + public function validateBySemantics(&$value, $semantics) { + // dd('validateBySemantics'); + $fakebaseobject = (object) array( + 'type' => 'group', + 'fields' => $semantics, + ); + $this->validateGroup($value, $fakebaseobject); + } + + /** + * Validate given text value against text semantics. + */ + public function validateText(&$text, $semantics) { + // dd('validateText'); + if ($semantics->widget && $semantics->widget == 'html') { + // FIXME: Implicit tags added in javascript are NOT vissible in + // $semantics->tags (such as mathml etc) + $allowedtags = implode('', array_map(array($this, 'bracketTags'), $semantics->tags)); + // Strip invalid HTML tags. + $text = strip_tags($text, $allowedtags); + } + else { + // Filter text to plain text. + $text = htmlspecialchars($text, ENT_QUOTES, 'UTF-8'); + } + // TODO: Check if string is within allowed length + // TODO: Check if string is according to optional regexp in semantics + } + private function bracketTags($tag) { + return '<'.$tag.'>'; + } + + /** + * Validate given value against number semantics + */ + public function validateNumber(&$number, $semantics) { + // Validate that $number is indeed a number + if (!is_numeric($number)) { + $number = 0; + } + // TODO: Check if number is within valid bounds. + // TODO: Check if number is within allowed bounds even if step value is set. + // TODO: Check if number has proper number of decimals. + } + + /** + * Validate given value against boolean semantics + */ + public function validateBoolean(&$bool, $semantics) { + if (!is_bool($bool)) { + $bool = FALSE; + } + } + + /** + * Validate select values + */ + public function validateSelect(&$select, $semantics) { + if (!in_array($select, array_map(array($this, 'map_object_value'), $semantics->options))) { + $this->h5pF->setErrorMessage($this->h5pF->t('Invalid selected option in select.')); + $select = $semantics->options[0]->value; + } + } + private function map_object_value($o) { + return $o->value; + } + + /** + * Validate given list value agains list semantics. + * Will recurse into validating each item in the list according to the type. + */ + public function validateList(&$list, $semantics) { + // dd('validateList'); + $field = $semantics->field; + // WTF happens in content with lists of libraries? + if ($semantics->field->type == 'group' + && $semantics->field->fields[0]->type == 'library') { + $field = $semantics->field->fields[0]; + } + + $function = $this->typeMap[$field->type]; + + foreach ($list as $key => $value) { + $this->$function($value, $field); + } + // TODO: Check that list is not longer than allowed length + } + + /** + * Validate given image data + */ + public function validateImage(&$image, $semantics) { + $image->path = htmlspecialchars($image->path); + if ($image->mime && substr($image->mime, 0, 5) !== 'image') { + unset($image->mime); + } + } + + /** + * Validate given video data + */ + public function validateVideo(&$video, $semantics) { + $video->path = htmlspecialchars($video->path); + if ($video->mime && substr($video->mime, 0, 5) !== 'video') { + unset($video->mime); + } + } + + /** + * Validate given audio data + */ + public function validateAudio(&$audio, $semantics) { + $audio->path = htmlspecialchars($audio->path); + if ($audio->mime && substr($audio->mime, 0, 5) !== 'audio') { + unset($audio->mime); + } + } + + /** + * Validate given group value against group semantics. + * Will recurse into validating each group member. + */ + public function validateGroup(&$group, $semantics) { + // dd('validateGroup'); + // dd(print_r($group, TRUE)); + // dd(print_r($semantics, TRUE)); + foreach ($group as $key => &$value) { + // dd("time for $key"); + // Find semantics for name=$key + $found = FALSE; + foreach ($semantics->fields as $field) { + if ($field->name == $key) { + $function = $this->typeMap[$field->type]; + $found = TRUE; + // dd(print_r($field, TRUE)); + // dd("calling dr. $function"); + break; + } + } + if ($found) { + $this->$function($value, $field); + } + else { + $this->h5pF->setErrorMessage($this->h5pF->t('H5P internal error: no validator exists for ' . $key)); + // dd('WTF!?'); + // dd(print_r($group, TRUE)); + } + } + } + + /** + * Validate given library value against library semantics. + * + * Will recurse into validating the library's semantics too. + */ + public function validateLibrary(&$value, $semantics) { + // dd('validLibrary'); + // Check if provided library is within allowed options + if (in_array($value->library, $semantics->options)) { + if (isset($semanticsCache[$value->library])) { + $librarySemantics = $semanticsCache[$value->library]; + } + else { + $libspec = $this->h5pC->libraryFromString($value->library); + $library = $this->h5pF->loadLibrary($libspec['machineName'], $libspec['majorVersion'], $libspec['minorVersion']); + $librarySemantics = json_decode($library['semantics']); + $semanticsCache[$value->library] = $librarySemantics; + } + return $this->validateBySemantics($value->params, $librarySemantics); + } + else { + $this->h5pF->setErrorMessage($this->h5pF->t('Library used in content is not a valid library according to semantics')); + // dd('Value: '.print_r($value, TRUE)); + // dd('Semantics: '.print_r($semantics, TRUE)); + } + } +} + ?> \ No newline at end of file From 1ca9eff064010ae8dac410ec757068a5a0f3c7c3 Mon Sep 17 00:00:00 2001 From: Frank Ronny Larsen Date: Mon, 8 Jul 2013 14:56:48 +0200 Subject: [PATCH 2/4] OPPG-413: Validation fixes --- h5p.classes.php | 103 ++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/h5p.classes.php b/h5p.classes.php index f7313c7..6f61406 100644 --- a/h5p.classes.php +++ b/h5p.classes.php @@ -1157,29 +1157,30 @@ class H5PContentValidator { * found within the value data. */ public function validateBySemantics(&$value, $semantics) { - // dd('validateBySemantics'); $fakebaseobject = (object) array( 'type' => 'group', 'fields' => $semantics, ); - $this->validateGroup($value, $fakebaseobject); + $this->validateGroup($value, $fakebaseobject, FALSE); } /** * Validate given text value against text semantics. */ public function validateText(&$text, $semantics) { - // dd('validateText'); if ($semantics->widget && $semantics->widget == 'html') { // FIXME: Implicit tags added in javascript are NOT vissible in - // $semantics->tags (such as mathml etc) - $allowedtags = implode('', array_map(array($this, 'bracketTags'), $semantics->tags)); + // $semantics->tags (such as mathml etc). Need to include defaults. + $allowedtags = '
'; + if ($semantics->tags) { + $allowedtags = implode('', array_map(array($this, 'bracketTags'), $semantics->tags)); + } // Strip invalid HTML tags. $text = strip_tags($text, $allowedtags); } else { // Filter text to plain text. - $text = htmlspecialchars($text, ENT_QUOTES, 'UTF-8'); + $text = htmlspecialchars($text); } // TODO: Check if string is within allowed length // TODO: Check if string is according to optional regexp in semantics @@ -1214,7 +1215,16 @@ class H5PContentValidator { * Validate select values */ public function validateSelect(&$select, $semantics) { - if (!in_array($select, array_map(array($this, 'map_object_value'), $semantics->options))) { + // Special case for dynamicCheckboxes (valid options are generated live) + if ($semantics->widget == 'dynamicCheckboxes') { + // No practical way to guess valid parameters. Just make sure we don't + // have special chars here. Also, dynamicCheckboxes will insert an + // array, so iterate it. + foreach ($select as $key => $value) { + $select[$key] = htmlspecialchars($value); + } + } + else if (!in_array($select, array_map(array($this, 'map_object_value'), $semantics->options))) { $this->h5pF->setErrorMessage($this->h5pF->t('Invalid selected option in select.')); $select = $semantics->options[0]->value; } @@ -1228,13 +1238,7 @@ class H5PContentValidator { * Will recurse into validating each item in the list according to the type. */ public function validateList(&$list, $semantics) { - // dd('validateList'); $field = $semantics->field; - // WTF happens in content with lists of libraries? - if ($semantics->field->type == 'group' - && $semantics->field->fields[0]->type == 'library') { - $field = $semantics->field->fields[0]; - } $function = $this->typeMap[$field->type]; @@ -1258,9 +1262,11 @@ class H5PContentValidator { * Validate given video data */ public function validateVideo(&$video, $semantics) { - $video->path = htmlspecialchars($video->path); - if ($video->mime && substr($video->mime, 0, 5) !== 'video') { - unset($video->mime); + foreach ($video as $variant) { + $variant->path = htmlspecialchars($variant->path); + if ($variant->mime && substr($variant->mime, 0, 5) !== 'video') { + unset($variant->mime); + } } } @@ -1268,9 +1274,11 @@ class H5PContentValidator { * Validate given audio data */ public function validateAudio(&$audio, $semantics) { - $audio->path = htmlspecialchars($audio->path); - if ($audio->mime && substr($audio->mime, 0, 5) !== 'audio') { - unset($audio->mime); + foreach ($audio as $variant) { + $variant->path = htmlspecialchars($variant->path); + if ($variant->mime && substr($variant->mime, 0, 5) !== 'audio') { + unset($variant->mime); + } } } @@ -1278,30 +1286,35 @@ class H5PContentValidator { * Validate given group value against group semantics. * Will recurse into validating each group member. */ - public function validateGroup(&$group, $semantics) { - // dd('validateGroup'); - // dd(print_r($group, TRUE)); - // dd(print_r($semantics, TRUE)); - foreach ($group as $key => &$value) { - // dd("time for $key"); - // Find semantics for name=$key - $found = FALSE; - foreach ($semantics->fields as $field) { - if ($field->name == $key) { - $function = $this->typeMap[$field->type]; - $found = TRUE; - // dd(print_r($field, TRUE)); - // dd("calling dr. $function"); - break; + public function validateGroup(&$group, $semantics, $flatten = TRUE) { + // Groups with just one field are compressed in the editor to only output + // the child content. (Exemption for fake groups created by + // "validateBySemantics" above) + if (count($semantics->fields) == 1 && $flatten) { + $field = $semantics->fields[0]; + $function = $this->typeMap[$field->type]; + $this->$function($group, $field); + } + else { + foreach ($group as $key => &$value) { + // Find semantics for name=$key + $found = FALSE; + foreach ($semantics->fields as $field) { + if ($field->name == $key) { + $function = $this->typeMap[$field->type]; + $found = TRUE; + break; + } + } + if ($found) { + $this->$function($value, $field); + } + else { + // If validator is not found, something exists in content that does + // not have a corresponding semantics field. Remove it. + $this->h5pF->setErrorMessage($this->h5pF->t('H5P internal error: no validator exists for ' . $key)); + unset($group->$key); } - } - if ($found) { - $this->$function($value, $field); - } - else { - $this->h5pF->setErrorMessage($this->h5pF->t('H5P internal error: no validator exists for ' . $key)); - // dd('WTF!?'); - // dd(print_r($group, TRUE)); } } } @@ -1312,7 +1325,6 @@ class H5PContentValidator { * Will recurse into validating the library's semantics too. */ public function validateLibrary(&$value, $semantics) { - // dd('validLibrary'); // Check if provided library is within allowed options if (in_array($value->library, $semantics->options)) { if (isset($semanticsCache[$value->library])) { @@ -1324,14 +1336,11 @@ class H5PContentValidator { $librarySemantics = json_decode($library['semantics']); $semanticsCache[$value->library] = $librarySemantics; } - return $this->validateBySemantics($value->params, $librarySemantics); + $this->validateBySemantics($value->params, $librarySemantics); } else { $this->h5pF->setErrorMessage($this->h5pF->t('Library used in content is not a valid library according to semantics')); - // dd('Value: '.print_r($value, TRUE)); - // dd('Semantics: '.print_r($semantics, TRUE)); } } } - ?> \ No newline at end of file From 35e2623e1b3975fedbf8cd70585ea3694aa3b6b4 Mon Sep 17 00:00:00 2001 From: Frank Ronny Larsen Date: Mon, 8 Jul 2013 15:28:45 +0200 Subject: [PATCH 3/4] OPPG-413: Validation of specific limitations from semantics. --- h5p.classes.php | 48 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/h5p.classes.php b/h5p.classes.php index 6f61406..78f9bf4 100644 --- a/h5p.classes.php +++ b/h5p.classes.php @@ -1182,8 +1182,21 @@ class H5PContentValidator { // Filter text to plain text. $text = htmlspecialchars($text); } - // TODO: Check if string is within allowed length - // TODO: Check if string is according to optional regexp in semantics + // Check if string is within allowed length + if (isset($semantics->maxLength)) { + $text = mb_substr($text, 0, $semantics->maxLength); + } + // Check if string is according to optional regexp in semantics + if (isset($semantics->regexp)) { + $pattern = $semantics->regexp->pattern; + $pattern .= isset($semantics->regexp->modifiers) ? $semantics->regexp->modifiers : ''; + if (preg_match($pattern, $text) === 0) { + // Note: explicitly ignore return value FALSE, to avoid removing text + // if regexp is invalid... + $this->h5pF->setErrorMessage($this->h5pF->t('Provided string is not valid according to regexp in semantics.')); + $text = ''; + } + } } private function bracketTags($tag) { return '<'.$tag.'>'; @@ -1197,9 +1210,25 @@ class H5PContentValidator { if (!is_numeric($number)) { $number = 0; } - // TODO: Check if number is within valid bounds. - // TODO: Check if number is within allowed bounds even if step value is set. - // TODO: Check if number has proper number of decimals. + // Check if number is within valid bounds. Move within bounds if not. + if (isset($semantics->min) && $number < $semantics->min) { + $number = $semantics->min; + } + if (isset($semantics->max) && $number > $semantics->max) { + $number = $semantics->max; + } + // Check if number is within allowed bounds even if step value is set. + if (isset($semantics->step)) { + $testnumber = $number - (isset($semantics->min) ? $semantics->min : 0); + $rest = $testnumber % $semantics->step; + if ($rest !== 0) { + $number -= $rest; + } + } + // Check if number has proper number of decimals. + if (isset($semantics->decimals)) { + $number = round($number, $semantics->decimals); + } } /** @@ -1239,13 +1268,18 @@ class H5PContentValidator { */ public function validateList(&$list, $semantics) { $field = $semantics->field; - $function = $this->typeMap[$field->type]; + // Check that list is not longer than allowed length. We do this before + // iterating to avoid unneccessary work. + if (isset($semantics->max)) { + array_splice($list, $semantics->max); + } + + // Validate each element in list. foreach ($list as $key => $value) { $this->$function($value, $field); } - // TODO: Check that list is not longer than allowed length } /** From d2e35589279a401f6c103e13cebbb72146a87f9b Mon Sep 17 00:00:00 2001 From: Frank Ronny Larsen Date: Mon, 8 Jul 2013 16:15:54 +0200 Subject: [PATCH 4/4] OPPG-413: Enable caching, fill default tag list for HTML validation --- h5p.classes.php | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/h5p.classes.php b/h5p.classes.php index 78f9bf4..716686c 100644 --- a/h5p.classes.php +++ b/h5p.classes.php @@ -1169,12 +1169,27 @@ class H5PContentValidator { */ public function validateText(&$text, $semantics) { if ($semantics->widget && $semantics->widget == 'html') { - // FIXME: Implicit tags added in javascript are NOT vissible in - // $semantics->tags (such as mathml etc). Need to include defaults. - $allowedtags = '
'; - if ($semantics->tags) { - $allowedtags = implode('', array_map(array($this, 'bracketTags'), $semantics->tags)); + // Build allowed tag list, based in $semantics->tags and known defaults. + // These four are always allowed. + $tags = array('div', 'span', 'p', 'br'); + if (isset($semantics->tags)) { + $tags = array_merge($tags, $semantics->tags); + // Add related tags for table etc. + if (in_array('table', $semantics->tags)) { + $tags = array_merge($tags, array('tr', 'td', 'th', 'colgroup', 'thead', 'tbody', 'tfoot')); + } + if (in_array('b', $semantics->tags)) { + $tags[] = 'strong'; + } + if (in_array('i', $semantics->tags)) { + $tags[] = 'em'; + } + if (in_array('ul', $semantics->tags) || in_array('ol', $semantics->tags)) { + $tags[] = 'li'; + } } + $allowedtags = implode('', array_map(array($this, 'bracketTags'), $tags)); + // Strip invalid HTML tags. $text = strip_tags($text, $allowedtags); }