You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

599 lines
20 KiB

  1. <?php
  2. /**
  3. * OEMBED PLUGIN
  4. *
  5. * Version history
  6. * 2008-07-31 - release v0.6 by Dwayne Bent <dbb.pub0@liqd.org>
  7. * 2019-09-01 - resuscitation & realignment with "Greebo"
  8. *
  9. * Licensed under the GPL 2 [http://www.gnu.org/licenses/gpl.html]
  10. *
  11. **/
  12. if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/');
  13. if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
  14. define('OEMBED_BASE',DOKU_PLUGIN.'oembed/');
  15. require_once(DOKU_PLUGIN.'syntax.php');
  16. require_once(DOKU_INC.'inc/HTTP/HTTPClient.php');
  17. class syntax_plugin_oembed extends DokuWiki_Syntax_Plugin {
  18. var $errors = array();
  19. var $version = '1.0';
  20. var $regex_master = '/^{{>\s*(?<url>.+?)(?:\s+(?<params>.+?))??\s*}}$/';
  21. function getType(){
  22. return 'substition';
  23. }
  24. function getAllowedTypes() {
  25. return array();
  26. }
  27. function getPType(){
  28. return 'block';
  29. }
  30. function getSort(){
  31. return 285;
  32. }
  33. function connectTo($mode) {
  34. $this->Lexer->addSpecialPattern('{{>.+?}}', $mode, 'plugin_oembed');
  35. }
  36. function handle($match, $state, $pos, Doku_Handler $handler){
  37. if($state == DOKU_LEXER_SPECIAL){
  38. if($parsed_tag = $this->parseTag($match)){
  39. $oembed_data = $this->resolve($parsed_tag);
  40. return array('oembed_data' => $oembed_data,
  41. 'tag' => $parsed_tag,
  42. 'errors' => $this->errors);
  43. }
  44. }
  45. return false;
  46. }
  47. function render($mode, Doku_Renderer $renderer, $data) {
  48. if($mode == 'xhtml'){
  49. $renderer->doc .= $this->renderXHTML($data);
  50. }
  51. return false;
  52. }
  53. /***************************************************************************
  54. * PARSE FUNCTIONS
  55. * Convert input strings to a usable form
  56. **************************************************************************/
  57. /*
  58. * Parse the entire matched string
  59. *
  60. * $tag: The entire matched string
  61. *
  62. * returns:
  63. * false on error otherwise
  64. * array of parsed data:
  65. * url: the target url
  66. * params: array of parsed parameters, see parseParams()
  67. */
  68. function parseTag($tag){
  69. if(preg_match($this->regex_master, $tag, $matches)){
  70. return array('url' => $matches['url'],
  71. 'params' => $this->parseParams($matches['params']));
  72. }
  73. return false;
  74. }
  75. /*
  76. * Parse the tag parameters
  77. *
  78. * $params: whitespace delimited list of parameters (no trailing or leading
  79. * whitespace)
  80. *
  81. * returns:
  82. * array of parsed parameters:
  83. * provider: array of provider parameters:
  84. * name => value
  85. * plugin: array of plugin parameters
  86. * name => value
  87. */
  88. function parseParams($params){
  89. $parsed_params = array('provider' => array(), 'plugin' => array());
  90. if($params != null){
  91. foreach(preg_split('/\s+/', $params) as $param){
  92. if(preg_match('/^(?<type>!|\?)(?<name>\S+?)(?:=(?<value>\S+?))?$/', $param, $matches)){
  93. if($matches['type'] == '?'){
  94. $parsed_params['provider'][$matches['name']] = $matches['value'];
  95. }
  96. else if($matches['type'] == '!'){
  97. $parsed_params['plugin'][$matches['name']] = $matches['value'];
  98. }
  99. }
  100. }
  101. }
  102. return $parsed_params;
  103. }
  104. /*
  105. * Parse an HTTP response containing OEmbed data
  106. *
  107. * $response: array of HTTP response data
  108. * status: numerical HTTP status code
  109. * headers: array of HTTP headers
  110. * name => value
  111. * body: body of the response
  112. *
  113. * returns: false on error or array of parsed oembed data:
  114. * name => value
  115. */
  116. function parseResponse($response){
  117. if($response['status'] != 200) return $this->error("Provider returned HTTP Status {$response['status']} for {$tag['url']}");
  118. if(!$type = $this->parseContentType($response['headers']['content-type'])) return false;
  119. $oembed = array();
  120. switch($type){
  121. case 'xml':
  122. if(!$xml = simplexml_load_string($response['body'])) return $this->error("Unable to parse XML: {$response['body']}");
  123. foreach($xml as $element){
  124. $oembed[$element->getName()] = (string) $element;
  125. }
  126. break;
  127. case 'json':
  128. $oembed = json_decode($response['body']);
  129. break;
  130. default:
  131. return $this->error("Internal error occured. Found type: {$type}");
  132. }
  133. //if($oembed['version'] != '1.0') return $this->error("Unsupported OEmbed version: {$oembed['version']}");
  134. return $oembed;
  135. }
  136. /*
  137. * Parse a content-type string from an HTTP header
  138. *
  139. * $header: The content-type string
  140. *
  141. * returns: false on error or 'json' for JSON content or 'xml' for XML content
  142. */
  143. function parseContentType($header){
  144. if(!preg_match('/^\s*(?<type>[^;\s]+)(.*)?/', $header, $matches)){
  145. return $this->error("Invalid Content-Type header: {$header}");
  146. }
  147. switch($matches['type']){
  148. case 'text/xml':
  149. return 'xml';
  150. case 'application/json':
  151. return 'json';
  152. // non-spec content-types, only supported for compatibility
  153. case 'application/xml':
  154. return 'xml';
  155. case 'text/json':
  156. return 'json';
  157. case 'text/plain':
  158. return 'json';
  159. default:
  160. return $this->error("Unsupported Content-Type: {$matches['type']}");
  161. }
  162. }
  163. /***************************************************************************
  164. * RESOLVE FUNCTIONS
  165. * Given parsed tag data get OEmbed data
  166. **************************************************************************/
  167. /*
  168. * Given parsed tag information, return OEmbed data
  169. *
  170. * $tag: Parsed tag information, as from parseTag()
  171. *
  172. * returns: false on error or array of OEmbed data
  173. * oembed: array of OEmbed data as returned from provider
  174. * query_url: URL used to get the OEmbed data
  175. * target_url: URL to which the OEmbed data refers
  176. */
  177. function resolve($tag){
  178. // try to resolve using cache
  179. if($data = $this->resolveCache($tag)) return $data;
  180. // try to resolve directly
  181. if(array_key_exists('direct', $tag['params']['plugin'])){
  182. if($this->getConf('enable_direct_link')){
  183. return $this->resolveDirect($tag);
  184. }
  185. }
  186. if($this->getConf('resolution_priority') == 'link discovery'){
  187. // try link discovery
  188. if($this->getConf('enable_link_discovery')){
  189. if($data = $this->resolveDiscovery($tag)) return $data;
  190. }
  191. // try local provider list
  192. if($this->getConf('enable_provider_list')){
  193. if($data = $this->resolveProviderList($tag)) return $data;
  194. }
  195. }
  196. else if($this->getConf('resolution_priority') == 'provider list'){
  197. // try local provider list
  198. if($this->getConf('enable_provider_list')){
  199. if($data = $this->resolveProviderList($tag)) return $data;
  200. }
  201. // try link discovery
  202. if($this->getConf('enable_link_discovery')){
  203. if($data = $this->resolveDiscovery($tag)) return $data;
  204. }
  205. }
  206. return $this->error("All resolution methods failed");
  207. }
  208. /*
  209. * Analogous to resolve(), using the cache for resolution
  210. */
  211. function resolveCache($tag){
  212. return false;
  213. }
  214. /*
  215. * Analogous to resolve(), using a directly entered API endpoint for
  216. * resolution
  217. */
  218. function resolveDirect($tag){
  219. $query_url = $this->buildURL($tag['url'], $tag['params']['provider']);
  220. if(!$response = $this->fetch($query_url)) return false;
  221. if(!$oembed = $this->parseResponse($response)) return false;
  222. return array('oembed' => $oembed,
  223. 'query_url' => $query_url,
  224. 'target_url' => $tag['params']['provider']['url']);
  225. }
  226. /*
  227. * Analogous to resolve(), using link discovery for resolution
  228. */
  229. function resolveDiscovery($tag){
  230. if(!$response = $this->fetch($tag['url'])) return false;
  231. if(!$link_url = $this->getOEmbedLink($response['body'])) return false;
  232. $query_url = $this->buildURL($link_url, $tag['params']['provider']);
  233. if(!$response = $this->fetch($query_url)) return false;
  234. if(!$oembed = $this->parseResponse($response)) return false;
  235. return array('oembed' => $oembed,
  236. 'query_url' => $query_url,
  237. 'target_url' => $tag['url']);
  238. }
  239. /*
  240. * Analogous to resolve(), using the local provider list for resolution
  241. */
  242. function resolveProviderList($tag){
  243. if(!$api = $this->getProviderAPI($tag['url'])) return false;
  244. $api = str_replace("{format}", $this->getConf('format_preference'), $api);
  245. $params = array_merge($tag['params']['provider'], array('url' => $tag['url']));
  246. $query_url = $this->buildURL($api, $params);
  247. if(!$response = $this->fetch($query_url)) return false;
  248. if(!$oembed = $this->parseResponse($response)) return false;
  249. return array('oembed' => $oembed,
  250. 'query_url' => $query_url,
  251. 'target_url' => $tag['url']);
  252. }
  253. /***************************************************************************
  254. * RENDER FUNCTIONS
  255. * Convert OEmbed data to a presentable form
  256. **************************************************************************/
  257. /*
  258. * Given OEmbed data as returned by resolve(), produces a valid XHTML
  259. * representation
  260. *
  261. * $data: OEmbed data as returned by resolve()
  262. *
  263. * returns: XHTML representation of OEmbed data
  264. */
  265. function renderXHTML($data){
  266. $content = '';
  267. if(!$data['oembed_data']){
  268. $content .= "OEmbed Error";
  269. $content .= "<ul>";
  270. foreach($data['errors'] as $error){
  271. $content .= "<li>".$error."</li>";
  272. }
  273. $content .= "</ul>";
  274. return $content;
  275. }
  276. $oembed = $this->sanitizeOEmbed($data['oembed_data']['oembed']);
  277. if(array_key_exists('thumbnail', $data['tag']['params']['plugin'])){
  278. if($oembed['thumbnail_url']){
  279. $img = '<img src="'.$oembed['thumbnail_url'].'" alt="'.$oembed['title'].'" title="'.$oembed['title'].'" height="'.$oembed['thumbnail_height'].'px" width="'.$oembed['thumbnail_width'].'px"/>';
  280. $content = '<a href="'.$data['oembed_data']['target_url'].'">'.$img.'</a>';
  281. }
  282. else{
  283. $content = $this->renderXHTMLLink($data);
  284. }
  285. }
  286. else{
  287. switch($oembed['type']){
  288. case 'photo':
  289. if($this->getConf('fullwidth_images')){
  290. $content = '<img src="'.$oembed['url'].'" alt="'.$oembed['title'].'" title="'.$oembed['title'].'" width=100% />';
  291. } else {
  292. $content = '<img src="'.$oembed['url'].'" alt="'.$oembed['title'].'" title="'.$oembed['title'].'" height="'.$oembed['height'].'px" width="'.$oembed['width'].'px"/>';
  293. }
  294. break;
  295. case 'video':
  296. $content = $oembed['html'];
  297. break;
  298. case 'link':
  299. $content = $this->renderXHTMLLink($data);
  300. break;
  301. case 'rich':
  302. $content = $oembed['html'];
  303. break;
  304. default:
  305. $content = "OEmbed Error <ul><li>Unsupported media type: {$oembed['type']}</li></ul>";
  306. }
  307. }
  308. return $content;
  309. }
  310. /*
  311. * Given OEmbed data as returned by resolve(), produces a valid XHTML
  312. * representation as a simple link
  313. *
  314. * $data: OEmbed data as returned by resolve()
  315. *
  316. * returns: XHTML representation of OEmbed data as a simple link
  317. */
  318. function renderXHTMLLink($data){
  319. $text .= ($data['oembed_data']['oembed']['provider_name'] != null) ? $data['oembed_data']['oembed']['provider_name'].': ' : '';
  320. $text .= $data['oembed_data']['oembed']['title'];
  321. $text .= ($data['oembed_data']['oembed']['author_name'] != null) ? ' &ndash; '.$data['oembed_data']['oembed']['author_name'] : '';
  322. return '<a class="urlextern" href="'.$data['oembed_data']['target_url'].'">'.$text.'</a>';
  323. }
  324. /***************************************************************************
  325. * UTILITY FUNCTIONS
  326. * Provides shared functionality
  327. **************************************************************************/
  328. /*
  329. * Stores a message in the errors array and returns false
  330. *
  331. * $msg: message to store
  332. *
  333. * returns: false
  334. */
  335. function error($msg){
  336. array_push($this->errors, $msg);
  337. return false;
  338. }
  339. /*
  340. * Performs an HTTP GET request on the given URL
  341. *
  342. * $url: URL to perform the request on
  343. *
  344. * returns: false on error or array representing the HTTP response
  345. * status: numerical HTTP status code
  346. * headers: array of HTTP headers
  347. * name => value
  348. * body: HTTP response body
  349. */
  350. function fetch($url){
  351. $client = new DokuHTTPClient();
  352. if(!$client->sendRequest($url)){
  353. return $this->error("Error sending request to provider: {$url}");
  354. }
  355. return array('status' => $client->status,
  356. 'headers' => $client->resp_headers,
  357. 'body' => $client->resp_body);
  358. }
  359. /*
  360. * Given a base URL, create a new URL using the given parameters. Query
  361. * values are URL encoded.
  362. *
  363. * $base: base URL, any existing parameter values should be URL encoded.
  364. * $params: array of parameters to add to URL
  365. * name => value
  366. *
  367. * returns: the new URL
  368. */
  369. function buildURL($base, $params){
  370. $url = $base;
  371. $first = strpos($base,"?") === false ? true : false;
  372. foreach($params as $name => $value){
  373. if($first){ $url .= "?"; $first = false; }
  374. else { $url .= "&"; }
  375. $url .= $name."=".rawurlencode($value);
  376. }
  377. return $url;
  378. }
  379. /*
  380. * Given raw HTML, tries to extract oembed discovery link
  381. *
  382. * Based on code by Keith Devens:
  383. * http://keithdevens.com/weblog/archive/2002/Jun/03/RSSAuto-DiscoveryPHP
  384. *
  385. * Parameters:
  386. * $html: raw HTML
  387. *
  388. * Returns: false on error or no link present or an OEmbed discovery link
  389. */
  390. function getOEmbedLink($html){
  391. $ret_link = false;
  392. if(!$html) return false;
  393. // search through the HTML, save all <link> tags
  394. // and store each link's attributes in an associative array
  395. preg_match_all('/<link\s+(.*?)\s*\/?>/si', $html, $matches);
  396. $links = $matches[1];
  397. $final_links = array();
  398. $link_count = count($links);
  399. for($n=0; $n<$link_count; $n++){
  400. $attributes = preg_split('/\s+/s', $links[$n]);
  401. foreach($attributes as $attribute){
  402. $att = preg_split('/\s*=\s*/s', $attribute, 2);
  403. if(isset($att[1])){
  404. $att[1] = preg_replace('/([\'"]?)(.*)\1/', '$2', $att[1]);
  405. $final_link[strtolower($att[0])] = $att[1];
  406. }
  407. }
  408. $final_links[$n] = $final_link;
  409. }
  410. // now figure out which one points to the OEmbed data
  411. for($n=0; $n<$link_count; $n++){
  412. if(strtolower($final_links[$n]['rel']) == 'alternate'){
  413. if(strtolower($final_links[$n]['type']) == 'application/json+oembed'){
  414. if($this->getConf('format_preference') == 'json'){
  415. return $final_links[$n]['href'];
  416. }
  417. else{
  418. $ret_link = $final_links[$n]['href'];
  419. }
  420. }
  421. // application/xml+oembed only exists for compatability not in spec
  422. if(strtolower($final_links[$n]['type']) == 'text/xml+oembed' or
  423. strtolower($final_links[$n]['type']) == 'application/xml+oembed'){
  424. if($this->getConf('format_preference') == 'xml'){
  425. return $final_links[$n]['href'];
  426. }
  427. else{
  428. $ret_link = $final_links[$n]['href'];
  429. }
  430. }
  431. }
  432. }
  433. return $ret_link;
  434. }
  435. /*
  436. * Given a URL, finds a OEmbed provider API endpoint which can be used with
  437. * it from the local provider list.
  438. *
  439. * $url: URL to search a provider for
  440. *
  441. * Returns: false on error or no provider find or the API endpoint of an
  442. * appropriate provider
  443. */
  444. function getProviderAPI($url){
  445. $providers_path = OEMBED_BASE.'providers.xml';
  446. if(!$providers = simplexml_load_file($providers_path)) return false;
  447. foreach($providers->provider as $provider){
  448. foreach($provider->scheme as $scheme){
  449. $regex = "@^".str_replace("@","\@",$scheme)."$@i";
  450. if(preg_match($regex, trim($url))){
  451. $attrs = $provider->attributes();
  452. if(($api = $attrs['api']) != null){
  453. return $api;
  454. }
  455. }
  456. }
  457. }
  458. return false;
  459. }
  460. /*
  461. * Runs htmlspecialchars() on values in OEmbed data EXCEPT for html values
  462. *
  463. * $oembed: array of OEmbed data from parseResponse()
  464. *
  465. * Returns: identical array to $oembed in which all values except for html
  466. * are run through htmlspecialchars()
  467. */
  468. function sanitizeOEmbed($oembed){
  469. $retarray = array();
  470. foreach($oembed as $key => $value){
  471. if($key == 'html'){
  472. $retarray[$key] = $value;
  473. }
  474. else{
  475. $retarray[$key] = htmlspecialchars($value);
  476. }
  477. }
  478. return $retarray;
  479. }
  480. /***************************************************************************
  481. * DEBUG FUNCTIONS
  482. * For testing and devlopment, not regularly used
  483. **************************************************************************/
  484. function _log($msg){
  485. $fh = fopen(OEMBED_BASE."oembed.log",'a');
  486. $curtime = date('Y-m-d H:i:s');
  487. fwrite($fh, "[{$curtime}] {$msg}\n");
  488. fclose($fh);
  489. }
  490. function _logParsedTag($parsed_tag){
  491. $this->_log("Parsed Tag");
  492. $this->_log(" URL: {$parsed_tag['url']}");
  493. $this->_log(" Provider Params:");
  494. foreach($parsed_tag['params']['provider'] as $key => $value){
  495. $this->_log(" {$key} => {$value}");
  496. }
  497. $this->_log(" Plugin Params:");
  498. foreach($parsed_tag['params']['plugin'] as $key => $value){
  499. $this->_log(" {$key} => {$value}");
  500. }
  501. }
  502. function _logOEmbedData($oembed){
  503. $this->_log("OEmbed Data:");
  504. $this->_log(" target_url: {$oembed['target_url']}");
  505. $this->_log(" query_url: {$oembed['query_url']}");
  506. $this->_log(" Response:");
  507. foreach($oembed['oembed'] as $name => $value){
  508. $this->_log(" {$name}: {$value}");
  509. }
  510. }
  511. function _logErrors($errors){
  512. $this->_log("Errors:");
  513. foreach($errors as $error){
  514. $this->_log(" {$error}");
  515. }
  516. }
  517. }