(.*?) ?:.*?

(.*?)<\\/div>' final JSON_REGEX = 'var videoListJSON = (\\{.*?](?=\\};))' int getVersion() { return 1 } int getExtractItemsTimeout() { return 30 } WebResourceContainer errorHandlerWRC(String e) { List items = [] println e log(e) items << new WebResourceItem(title: e, additionalInfo: ['url':'http://error','thumbnailUrl':'http://fake.jpg']) WebResourceContainer wrc = new WebResourceContainer(title: "Error", items: items) return wrc } void errorHandler(String e) { println e log(e) return } String getExtractorName() { return 'discovery.com' } boolean extractorMatches(URL feedUrl) { return feedUrl ==~ VALID_FEED_URL } WebResourceContainer extractItems(URL resourceUrl, int maxItemsToRetrieve) { List items = [] Date releaseDate URL urlEpisodeList String pageContent String strShowName String strEpisodeURL String strSubDomain def jsMatcher //Get the subdomain jsMatcher = resourceUrl =~ 'https?:\\/\\/(.*?)\\.discovery\\.com' strSubDomain = jsMatcher[0][1] //Get the show title pageContent = resourceUrl.getText() jsMatcher = pageContent =~ SHOW_TITLE strShowName = unescapeHtml(jsMatcher[0][1]) //Make sure there is a valid max number if (maxItemsToRetrieve <= -1) { maxItemsToRetrieve=100 } //Get the full episode list URL urlEpisodeList = new URL(String.format(EPISODE_LIST, java.net.URLEncoder.encode(strShowName), maxItemsToRetrieve)) //println urlEpisodeList pageContent = urlEpisodeList.getText().replaceAll("\n", "") jsMatcher = pageContent =~ SINGLE_EPISODE if (jsMatcher.count <= 0) { return errorHandlerWRC("Discovery.com: No Episodes found") } try { for (def i = 0; i < jsMatcher.count; i++) { Map additionalInfo = new HashMap(); strEpisodeURL = jsMatcher[i][1] strEpisodeURL = strEpisodeURL.replaceAll('^\\/', String.format(EPISODE_URL, strSubDomain)) additionalInfo.put("url", strEpisodeURL) additionalInfo.put("thumbnailUrl", jsMatcher[i][2]) releaseDate = Date.parse("MM/dd/yyyy", jsMatcher[i][4]) items << new WebResourceItem(title: unescapeHtml(jsMatcher[i][3]).replaceAll(".*?: ", ""), releaseDate: releaseDate, additionalInfo: additionalInfo) } } catch(e) { errorHandlerWRC("Discovery.com: Error parsing episodes") } return new WebResourceContainer(title: strShowName, items: items) } ContentURLContainer extractUrl(WebResourceItem item, PreferredQuality requestedQuality) { List items = [] String pageContent String url, concat int intMP4 def jsMatcher def json def cc def cacheKey //Get episode page pageContent = new URL(item.additionalInfo.url).getText().replaceAll("\n", "").replaceAll("\r", "") //Get the chunk of json with the video info jsMatcher = pageContent =~ JSON_REGEX json = new JsonSlurper().parseText(jsMatcher[0][1].replaceAll(" \\/\\/.*?\"", "\"") + "}") //Get the first clip from the JSON //The rest are for other videos if (Integer.valueOf(json.clips.size) >= 0) { intMP4 = json.clips[0].mp4.size() - 1 if (intMP4 < 0) { intMP4 = 0 } //Get the URL for the requested quality if (requestedQuality == PreferredQuality.HIGH) url = json.clips[0].mp4.src[intMP4] else if (requestedQuality == PreferredQuality.MEDIUM) url = json.clips[0].mp4.src[(int) Math.round((double) intMP4 / 2)] else if (requestedQuality == PreferredQuality.LOW) url = json.clips[0].mp4.src[0] cacheKey = "Discovery_${json.clips[0].uuid}_${requestedQuality}" } return new ContentURLContainer(contentUrl: url, thumbnailUrl: item.additionalInfo.thumbnailUrl, expiresImmediately: true, cacheKey: cacheKey) } static void main(args) { Discovery extractor = new Discovery() //for testing //println extractor.extractorMatches(new URL("http://science.discovery.com/tv-shows/how-its-made")) //WebResourceContainer container = extractor.extractItems( new URL("http://animal.discovery.com/tv-shows/my-cat-from-hell"), 1) //WebResourceContainer container = extractor.extractItems( new URL("http://dsc.discovery.com/tv-shows/mythbusters"), 1) WebResourceContainer container = extractor.extractItems( new URL("http://science.discovery.com/tv-shows/how-its-made"), 5) if (container) { container.getItems().each { ContentURLContainer result = extractor.extractUrl(it, PreferredQuality.HIGH) println it println "" println result println "" } } } }