Added new script feed2podcast

author: Thorsten Ortlepp <post@ortlepp.eu> 2021-04-11 16:19:22 +0200
committer: Thorsten Ortlepp <post@ortlepp.eu> 2021-04-11 16:19:22 +0200
commit: 6e680ee71b37490f4c448de6d95f86f147037559 (patch)
tree: 8c8cdf5096aaa9e431e3c3fffdac8ccae290e39c
parent: ab7a3c3344fe4e993299a8b6bceecd4932921be5 (diff)
download: php-stuff-6e680ee71b37490f4c448de6d95f86f147037559.zip
2 files changed, 194 insertions, 0 deletions
diff --git a/README b/README
index 09ab543..b644639 100644
--- a/README
+++ b/README
@@ -2,6 +2,13 @@ PHP stuff
 =========
 
 
+feed2podcast
+---------
+A simple script to convert an RSS feed into a podcast. Designed for
+Deutschlandfunk Kalenderblatt which is only available online and as
+RSS feed but not as podcast.
+
+
 rssfilter
 ---------
 A simple script to remove unwanted articles from an RSS feed, either
diff --git a/feed2podcast/feed2podcast.php b/feed2podcast/feed2podcast.php
new file mode 100644
index 0000000..0c16481
--- /dev/null
+++ b/feed2podcast/feed2podcast.php
@@ -0,0 +1,187 @@
+<?php
+// A script to convert an RSS feed into a podcast
+
+// Configuration
+$feed = 'https://www.deutschlandfunk.de/kalenderblatt.870.de.rss';
+$downloads = 'audio';
+$temp = 'temp';
+
+
+// Setup
+if (!is_dir($downloads)) {
+ mkdir($downloads);
+}
+if (!is_dir($temp)) {
+ mkdir($temp);
+}
+
+
+// Set proper MIME type and encoding
+header('Content-Type: application/rss+xml; charset=utf-8');
+
+
+// Get the original feed
+$xml = simplexml_load_file($feed);
+
+
+// Write podcast feed intro
+echo '<?xml version="1.0" encoding="utf-8"?>';
+echo '<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">';
+echo '<channel>';
+echo '<title>'.$xml->channel->title.'</title>';
+echo '<link>https://www.deutschlandfunk.de/kalenderblatt.870.de.html</link>';
+echo '<description>Das Kalenderblatt stellt historische Ereignisse von Bedeutung oder von Relevanz in anschaulicher Weise dar.</description>';
+echo '<category>'.$xml->channel->category.'</category>';
+echo '<copyright>'.$xml->channel->copyright.'</copyright>';
+echo '<language>'.$xml->channel->language.'</language>';
+echo '<pubDate>'.$xml->channel->pubDate.'</pubDate>';
+echo '<lastBuildDate>'.$xml->channel->lastBuildDate.'</lastBuildDate>';
+echo '<ttl>'.$xml->channel->ttl.'</ttl>';
+echo '<image>';
+echo ' <url>https://'.$_SERVER['HTTP_HOST'].'/image.png</url>';
+echo ' <title>'.$xml->channel->image->title.'</title>';
+echo ' <link>'.$xml->channel->image->link.'</link>';
+echo ' <description>'.$xml->channel->image->description.'</description>';
+echo '</image>';
+echo '<atom:link rel="self" type="application/rss+xml" href="https://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'].'" />';
+echo '<itunes:subtitle>Die Beiträge zur Sendung</itunes:subtitle>';
+echo '<itunes:image href="https://'.$_SERVER['HTTP_HOST'].'/image.png"/>';
+echo '<itunes:new-feed-url>https://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'].'</itunes:new-feed-url>';
+echo '<itunes:owner>';
+echo ' <itunes:name>Redaktion deutschlandradio.de</itunes:name>';
+echo ' <itunes:email>podcast@deutschlandradio.de</itunes:email>';
+echo '</itunes:owner>';
+echo '<itunes:author>Deutschlandfunk</itunes:author>';
+echo '<itunes:explicit>No</itunes:explicit>';
+echo '<itunes:category text="History" />';
+
+
+// Write podcast episodes
+$containing = array();
+foreach ($xml->channel->item as $item) {
+ $content = getPageContent($item->link);
+
+ // Only add episode to feed if media file is available
+ if ($content[0] != 'XXX') {
+  $id = md5($item->guid);
+  $filename = $id.'.aac';
+
+  // Download episode if not yet done
+  if (!file_exists($downloads.'/'.$filename)) {
+   foreach (glob($temp.'/*') as $file) {
+    if (is_file($file)) {
+     unlink($file);
+    }
+   }
+   downloadMediaFile($content[1], $id, $temp, $downloads);
+  }
+
+  array_push($containing, $downloads.'/'.$filename);
+
+  // Write episode to podcast feed
+  echo '<item>';
+  echo ' <title>'.$item->title.'</title>';
+  echo ' <link>https://'.$_SERVER['HTTP_HOST'].'/'.$downloads.'/'.$filename.'</link>';
+  echo ' <description>'.$item->description.'</description>';
+  echo ' <pubDate>'.$item->pubDate.'</pubDate>';
+  echo ' <guid>'.$item->guid.'</guid>';
+  echo ' <enclosure url="https://'.$_SERVER['HTTP_HOST'].'/'.$downloads.'/'.$filename.'" length="'.filesize($downloads.'/'.$filename).'" type="audio/aac"/>';
+  echo ' <itunes:author>'.$content[0].'</itunes:author>';
+  echo ' <itunes:duration>'.$content[2].'</itunes:duration>';
+  echo '</item>';
+ }
+}
+
+// Remove unused episode media files
+foreach (glob($downloads.'/*') as $file) {
+ if(is_file($file) && !in_array($file, $containing)) {
+  unlink($file);
+ }
+}
+
+
+// Write podcast feed end
+echo '</channel>';
+echo '</rss>';
+
+
+///// --- FUNCTIONS --- \\\\\
+
+
+// Get selected content from episode website
+function getPageContent($site) {
+ $dom = new DomDocument();
+ $dom->loadHTML(download($site));
+ $xpath = new DOMXpath($dom);
+ $player = $xpath->query("//a[@class='player-embed']");
+ $author = $xpath->query("//p[@class='author']");
+
+ if ($player->length == 0) {
+  return array('XXX');
+ }
+
+ $authorname = str_replace('Von ', '', $author->item(0)->nodeValue);
+
+ $minutes = intdiv(intval($player->item(0)->getAttribute('data-audio-duration')), 60);
+ $seconds = intval($player->item(0)->getAttribute('data-audio-duration')) % 60;
+ $duration = $minutes.':'.sprintf('%02d', $seconds);
+
+ // Returns array [episode author name | media file url | episode duration]
+ return array($authorname, $player->item(0)->getAttribute('data-audio-src'), $duration);
+}
+
+
+// Download episode media file
+function downloadMediaFile($url, $id, $temp, $downloads) {
+ $regex = "/([^\n\r]+)/m";
+
+ // Download first playlist and get "inner" playlist
+ preg_match_all($regex, download($url), $lines);
+ $playlist = array_values(array_filter($lines[1], "isUrl"))[0];
+
+ // Download contents of "inner" playlist
+ preg_match_all($regex, download($playlist), $urls);
+
+ // Download all media file segments
+ $counter = 0;
+ foreach (array_filter($urls[1], "isUrl") as $url) {
+  $outfile = fopen($temp.'/'.$counter.'.ts', 'wb') or exit('File open failed');
+
+  $curl = curl_init();
+  curl_setopt($curl, CURLOPT_FILE, $outfile);
+  curl_setopt($curl, CURLOPT_HEADER, 0);
+  curl_setopt($curl, CURLOPT_URL, $url);
+  curl_exec($curl);
+  curl_close($curl);
+
+  fclose($outfile);
+
+  file_put_contents($temp.'/list.txt', 'file '.$counter.'.ts'."\n", FILE_APPEND);
+
+  $counter++;
+ }
+
+ // Concatenate segments to media file
+ $ffmpeg = './ffmpeg -f concat -i '.$temp.'/list.txt -c copy -bsf:a aac_adtstoasc '.$downloads.'/'.$id.'.aac';
+ exec($ffmpeg);
+}
+
+
+// Download URL and return content
+function download($url) {
+ $curl = curl_init();
+ curl_setopt($curl, CURLOPT_URL, $url);
+ curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
+ $download = curl_exec($curl);
+ curl_close($curl);
+ return $download;
+}
+
+
+// Check if a string looks like a url
+function isUrl($var) {
+ return !(strpos( $var , 'http') === false);
+}
+
+?>
author	Thorsten Ortlepp <post@ortlepp.eu>	2021-04-11 16:19:22 +0200
committer	Thorsten Ortlepp <post@ortlepp.eu>	2021-04-11 16:19:22 +0200
commit	6e680ee71b37490f4c448de6d95f86f147037559 (patch)
tree	8c8cdf5096aaa9e431e3c3fffdac8ccae290e39c
parent	ab7a3c3344fe4e993299a8b6bceecd4932921be5 (diff)
download	php-stuff-6e680ee71b37490f4c448de6d95f86f147037559.zip