demoable model

ucsd-cse-spis-2016 · Sep 1, 2016 · e2e0336 · e2e0336
1 parent 1a907f7
commit e2e0336
Showing 1 changed file with 55 additions and 42 deletions.
diff --git a/youtubetitles.py b/youtubetitles.py
@@ -196,6 +196,7 @@ def getAPIfromLink(link):
         for i in range(len(link)):
                 if link[i] == "=":
                         vidId = link[(i+1):len(link)]
+                        break
         APILink = "https://www.googleapis.com/youtube/v3/videos?part=snippet&id=" + vidId + "&key=AIzaSyDnYJlcS_O0hzFRVvMdR2CympAqFS4ClLU"
         return APILink
 
@@ -232,39 +233,10 @@ def text_to_wordlist(text):
     rs = rs.split()
     return rs
 
-def feature(datum, wordId):
-        feat = [0] * len(words)
-        r = text_to_wordlist(datum)
-        for w in r:
-                if w in words:
-                        feat[wordId[w]] += 1
-        feat.append(1)
-        feat.append(countCaps(datum))
-        feat.append(countPunct(datum))
-        return feat
-
-
-def feature2(datum, statsList):
-        feat = []
-
-        feat.append(len(datum))
-        feat.append(countCaps(datum))
-        feat.append(countPunct(datum))
-        feat.append(findCBRatio(datum, statsList))
-        return feat
-
-def feature2a(link):
-        feat = []
-        APIData = getAPIfromLink(link)       
-
-        feat.append(len(getTitleList(makeDict(getVids(APIData)))))
-        feat.append(countCaps(getTitleList(makeDict(getVids(APIData)))))
-        feat.append(countPunct(getTitleList(makeDict(getVids(APIData)))))
-        feat.append(findCBRatio2(link))
-        return feat
+
 
 print "Type 'begin()' to start the program"
-def begin()
+def begin():
         print "Please wait for 2 minutes while the data loads."
 
         cBResult = getVids('https://www.googleapis.com/youtube/v3/search?part=snippet,id&type=video&channelId=UCxJf49T4iTO_jtzWX3rW_jg&maxResults=50&key=AIzaSyDnYJlcS_O0hzFRVvMdR2CympAqFS4ClLU')
@@ -297,32 +269,73 @@ def begin()
         allWords.reverse()
         words = [w[1] for w in allWords[:700]]
         wordId = dict(zip(words, range(700)))
-
-
 
 
         print "Done"
 
-        def youtubeLink(link):
+        def feature(datum, wordId):
+                feat = [0] * len(words)
+                r = text_to_wordlist(datum)
+                for w in r:
+                        if w in words:
+                                feat[wordId[w]] += 1
+                feat.append(1)
+                feat.append(countCaps(datum))
+                feat.append(countPunct(datum))
+                return feat
+
+
+        def feature2(datum, statsList):
+                feat = []
+
+                feat.append(len(datum))
+                feat.append(countCaps(datum))
+                feat.append(countPunct(datum))
+                feat.append(findCBRatio(datum, statsList))
+                return feat
+
+        def feature2a(link):
+                feat = []
+                APIData = getAPIfromLink(link)       
+
+                feat.append(len(getTitleList(makeDict(getVids(APIData)))))
+                feat.append(countCaps(getTitleList(makeDict(getVids(APIData)))))
+                feat.append(countPunct(getTitleList(makeDict(getVids(APIData)))))
+                feat.append(findCBRatio2(link))
+                return feat
+
+        def ownTitle(title):
                 X = [feature(d, wordId) for d in titleList]
                 y = [1] * len(cBTitles) + [0] * len(normTitles)
                 logistic = LogisticRegression()
                 lr = logistic.fit(X, y)
-                return "stub"
 
-        def ownTitle(title):
+                titleFeat = feature(title, wordId)
+                prediction = lr.predict_proba(titleFeat)[0][1] * 100
+                print "The probability that your title is clickbait is: ", prediction, "%"
+                return ""
+        def youtubeVideo(link):
                 X = [feature2(d, statsList) for d in titleList]
                 y = [1] * len(cBTitles) + [0] * len(normTitles)
                 logistic = LogisticRegression()
                 lr = logistic.fit(X, y)
 
-                titleFeat = feature(title, wordId)
-                return "stub"
-
-
-
+                titleFeat = feature2a(link)
+                prediction = lr.predict_proba(titleFeat)[0][1] * 100
+                print "The probability that this video is clickbait is: ", prediction, "%"
+                return ""
+        def prompts():
+                response = raw_input("Type 1 to type your own title or type 2 to enter a link: ")
+                if response == "1":
+                        userInput = raw_input('Please type in your title: ')
+                        print ownTitle(userInput)
+                elif response == "2":
+                        userInput = raw_input('Please type in the link: ')
+                        print youtubeVideo(userInput)
+                prompts()
 
-
+        prompts()
+        return ""
 
 
 #how to get vid id