\\s*(.*?)\\s*

' if(synopsisMatcher) { synopsis = StringEscapeUtils.unescapeHtml(synopsisMatcher[0][1]) } def releasedMatcher = li =~ '(?ms).*?First shown:\\s*(.*?)\\n.*?' if(releasedMatcher) { String releaseDateString = releasedMatcher[0][1] releaseDate = parseDate(releaseDateString) } totalItems = totalItems + 1 Map additionalInfo = [:] additionalInfo.put('thumbnailUrl', icon) additionalInfo.put('videoUrl', url) items << new WebResourceItem(title: title, releaseDate: releaseDate, description: synopsis,additionalInfo: additionalInfo) } } return new WebResourceContainer(title: collectionTitle, thumbnailUrl: !items.empty ? items[0].additionalInfo['thumbnailUrl'] : null, items: items ) } private def parseDate(String date) { try { return new SimpleDateFormat('dd MMM yyyy').parse(date) } catch (ParseException e) { return new SimpleDateFormat('dd MMM yyyy').parse("1 Jan $date") } } private def findStreamId(String videoUrl) { String html = new URL(videoUrl).getText() def streamIdMatcher = html =~ '(?s)"vpid":"(.+?)"' if(streamIdMatcher) { return streamIdMatcher[0][1] } else { throw new RuntimeException("Cannot find stream id") } } private def ContentURLContainer parseStream(String streamId, PreferredQuality requestedQuality, String thumbnailUrl) { URL descriptorUrl = new URL("http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/" + streamId) def mediaSelectionNode = new XmlParser().parseText( descriptorUrl.getText() ) // get all media items that are either video or audio and have a supported connection sub-element List mediaItems = mediaSelectionNode.media.findAll { it -> (it.'@kind' == 'video' || it.'@kind' == 'audio') && it.connection.any { item -> item.'@supplier' == 'akamai_hls_open'} } if(mediaItems.size() > 0) { // sort media items by bitrate, lowest first and get an item List sortedItems = mediaItems.sort { it.'@bitrate'.toInteger() } Node selectedMediaItem = findSuitableItem(sortedItems, requestedQuality) // find first suitable connection element Node connectionNode = selectedMediaItem.find{ it -> it.'@supplier' == 'akamai_hls_open' } if( connectionNode != null ) { String contentUrl = connectionNode.'@href' MediaFileType fileType = MediaFileType.VIDEO; if( selectedMediaItem.'@kind'.startsWith('audio')) { fileType = MediaFileType.AUDIO; } Date itemExpiryDate = getExpiresGMTDate(selectedMediaItem.'@expires') String cacheKey = streamId + '_' + selectedMediaItem.'@bitrate' + '_' + connectionNode.'@supplier' return new ContentURLContainer(fileType: fileType, contentUrl: contentUrl, thumbnailUrl: thumbnailUrl, expiresOn: itemExpiryDate, cacheKey: cacheKey, live: false) } } return null; } private def Date getExpiresGMTDate(String dateString) { // expiry is in format 2016-02-26T16:30:00Z , let's assume it's always GMT SimpleDateFormat df = new SimpleDateFormat('yyyy-MM-dd\'T\'HH:mm:ss') df.setTimeZone(TimeZone.getTimeZone("GMT")) return df.parse(dateString) } private def Node findSuitableItem(List items, PreferredQuality requestedQuality) { if(requestedQuality == PreferredQuality.LOW || items.size() <= 1) { // worst quality, get the first from the list return items.head() } else if (requestedQuality == PreferredQuality.MEDIUM) { // get item from the middle return items.get(Math.round(items.size()/2).toInteger()) } else { // best quality, take the last url return items.last() } } static void main(args) { // this is just to test IPlayer extractor = new IPlayer() // categories // http://www.bbc.co.uk/iplayer/categories/documentaries/highlights // http://www.bbc.co.uk/iplayer/categories/documentaries-history/highlights // groups / collections // http://www.bbc.co.uk/iplayer/group/b06z98k4 // http://www.bbc.co.uk/iplayer/group/p02j4np8 // episodes (id comes from the programme URL) // http://www.bbc.co.uk/iplayer/episodes/b006m86d assert extractor.extractorMatches( new URL("http://www.bbc.co.uk/iplayer/categories/documentaries/highlights") ) assert extractor.extractorMatches( new URL("http://www.bbc.co.uk/iplayer/categories/documentaries-history/highlights") ) assert extractor.extractorMatches( new URL("http://www.bbc.co.uk/iplayer/group/b06z98k4") ) assert extractor.extractorMatches( new URL("http://www.bbc.co.uk/iplayer/group/p02j4np8") ) assert extractor.extractorMatches( new URL("http://www.bbc.co.uk/iplayer/episodes/b006m86d") ) //WebResourceContainer container = extractor.extractItems( new URL("http://www.bbc.co.uk/iplayer/group/b03m36wd"), -1) // WebResourceContainer container = extractor.extractItems( new URL("http://www.bbc.co.uk/iplayer/episodes/b006m86d"), -1) //WebResourceContainer container = extractor.extractItems( new URL("http://www.bbc.co.uk/iplayer/categories/documentaries-history/highlights"), -1) WebResourceContainer container = extractor.extractItems( new URL("http://www.bbc.co.uk/iplayer/categories/music/all?sort=atoz"), -1) //WebResourceContainer container = extractor.extractItems( new URL("http://www.bbc.co.uk/iplayer/categories/news/all?sort=atoz"), 3) println container ContentURLContainer result = extractor.extractUrl(container.getItems()[2], PreferredQuality.MEDIUM) println result } }