Ansichten: QuotePaste - CodePaste - NoPaste
Codesnippet eingetragen am 11.1.2016 um 09:04
Von: CaptchaSolver
Sprache: Python
Beschreibung: Get's a wave file, sequences it and identifies the pieaces. Convert to wav first if needed.
CodeSnippet:
  1. context@S314-V7R2-MINT ~/TMP/webapp/captcha $ cat splitter2.py
  2. #!/usr/bin/env python
  3.  
  4. import wave
  5. import sys
  6. import struct
  7. import os
  8. import time
  9. import httplib
  10. import urllib2
  11. import urllib
  12. import hashlib
  13. from random import randint
  14.  
  15. url="http://10.32.128.165"
  16. cookies = ""
  17. local_filename = "analyze.wav"
  18. counter = 5
  19. ############################## Send URL with Data ###########################################################
  20. def sendResponse(cookies,count,param):
  21. request = urllib2.Request(url+param)
  22. request.add_header("Cookie", cookies)
  23. opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=0))
  24. sock=urllib2.urlopen(request)
  25. content=sock.read()
  26. sock.close()
  27. # print content
  28. if count > 4:
  29. request_count = content[content.find('name="captcha" /><br><br>')+len('name="captcha" /><br><br>'):content.find(' <input type="submit"')]
  30. remote_file = content[content.find('<source src="')+len('<source src="'):content.find('" type="')]
  31. urllib.urlretrieve ("http://10.32.128.165/"+remote_file, local_filename)
  32. print "Reached "+str(request_count)
  33. if int(request_count) > 990:
  34. print content
  35.  
  36.  
  37. ############################## Get to the File ##############################################################
  38.  
  39. request = urllib2.Request(url)
  40. sock=urllib2.urlopen(request)
  41. cookies=sock.info()['Set-Cookie']
  42. content=sock.read()
  43. sock.close()
  44. print "First Cookie: " +str(cookies)+" "+str(content)
  45.  
  46. # build for loop here, at a later time :-)
  47. sendResponse(cookies,1,"/index.php")
  48. sendResponse(cookies,2,"/index.php")
  49. sendResponse(cookies,3,"/index.php")
  50. sendResponse(cookies,4,"/index.php")
  51. sendResponse(cookies,5,"/index.php")
  52.  
  53. print cookies
  54.  
  55. while True:
  56. ip = wave.open(local_filename, 'r')
  57. info = ip.getparams()
  58. frame_list = []
  59. for i in range(ip.getnframes()):
  60. sframe = ip.readframes(1)
  61. amplitude = struct.unpack('<h', sframe)[0]
  62. frame_list.append(amplitude)
  63. ip.close()
  64. for i in range(0,len(frame_list)):
  65. if abs(frame_list[i]) < 25:
  66. frame_list[i] = 0
  67. ################################ Find Out most louder portions of the audio file ###########################
  68. thresh = 30
  69. output = []
  70. nonzerotemp = []
  71. length = len(frame_list)
  72. i = 0
  73. while i < length:
  74. zeros = []
  75. while i < length and frame_list[i] == 0:
  76. i += 1
  77. zeros.append(0)
  78. if len(zeros) != 0 and len(zeros) < thresh:
  79. nonzerotemp += zeros
  80. elif len(zeros) > thresh:
  81. if len(nonzerotemp) > 0 and i < length:
  82. output.append(nonzerotemp)
  83. nonzerotemp = []
  84. else:
  85. nonzerotemp.append(frame_list[i])
  86. i += 1
  87. if len(nonzerotemp) > 0:
  88. output.append(nonzerotemp)
  89.  
  90. chunks = []
  91. for j in range(0,len(output)):
  92. if len(output[j]) > 3000:
  93. chunks.append(output[j])
  94.  
  95. # Below code generates separate wav files depending on the number of loud voice detected.
  96.  
  97. NEW_RATE = 1 #Change it to > 1 if any amplification is required
  98. # Sometimes we've got two hashes for one letter, this seems to happen when the latter appears in the first position
  99. md5_table=[
  100. ['65f0e942b400045c62449a8037712d9a', '0'], # Works
  101. ['eb0adb153f6f1682a68fcac393afdd67', '0'], # Works
  102. ['c38fa643069536a88c15cc98906a0198', '1'], # Works
  103. ['6c9bdd133ce111bb6525a2ec3b209225', '1'], # Works
  104. ['28f077539a19c25abbea0061dd9d3780', '2'], # Works
  105. ['ef263ce2d0cb3522ec2a6460c0285bc0', '3'], # Works
  106. ['2b3144262303dfd62e6a416b2aeec8bf', '3'], # Works
  107. ['721e764bae5fa444dca01d6e38d7e037', '4'], # Works
  108. ['0b960d29b94b799ac73b31f4d1be08f8', '5'], # Works
  109. ['5e4e273bcae50c86a071878da9c583ba', '6'], # Works
  110. ['b07a747c7ecc1b395ae77d77e8228207', '6'], # Works
  111. ['e44c480cf312dc4acece3dc8ed76482d', '7'], # Works
  112. ['0354512ef293a02ceef32761beb6fc0c', '7'], # Works
  113. ['aea1ef6d6183296f9f868807bab50880', '8'], # Works
  114. ['c5bafac98c1a545ed10906bca5e1c5df', '9'], # Works
  115. ['40e408300460e7070e9f9dbd779b9a50', 'a'], # Works
  116. ['14814360d031a13c30afb1b2132f6cf9', 'b'], # Works
  117. ['126031c2cf4876e143e3decb21c60b87', 'c'], # Works
  118. ['6c8f6c1f4fd47c3bf1b61a9b4696d318', 'c'], # Works
  119. ['72b3d10ae1ffe3948c110f2b7550ba49', 'd'], # Works
  120. ['a264998d3fa7e93900df053904e16522', 'e'], # Works
  121. ['2df461ea1d2d20accf0af96e63002619', 'f'], # Works
  122. ['348cc096c0a6c4969a18b299477a06f4', 'f'], # Works
  123. ['', 'g'],
  124. ['', 'h'],
  125. ['', 'i'],
  126. ['', 'j'],
  127. ['', 'k'],
  128. ['', 'l'],
  129. ['', 'm'],
  130. ['', 'n'],
  131. ['', 'o'],
  132. ['', 'p'],
  133. ['', 'q'],
  134. ['', 'r'],
  135. ['', 's'],
  136. ['', 't'],
  137. ['', 'u'],
  138. ['', 'v'],
  139. ['', 'w'],
  140. ['', 'x'],
  141. ['', 'y'],
  142. ['', 'z'],
  143. ]
  144. print '[+] Possibly ',len(chunks),'number of loud voice detected...'
  145. sequence = ''
  146. errorflag = 0
  147. for i in range(0, len(chunks)):
  148. new_frame_rate = info[0]*NEW_RATE
  149. # print '[+] Creating No. ',str(i),'file..'
  150. split = wave.open('cut_'+str(i)+'.wav', 'w')
  151. split.setparams((info[0],info[1],info[2],0,info[4],info[5]))
  152.  
  153. # Add the voice for the first time
  154. for frames in chunks[i]:
  155. single_frame = struct.pack('<h', frames)
  156. split.writeframes(single_frame)
  157.  
  158. split.close()#Close each files
  159.  
  160. # Now that we have single wav files look them up from a map
  161. tmp_md5 = hashlib.md5(open('cut_'+str(i)+'.wav', 'rb').read()).hexdigest()
  162. try:
  163. letter = [y[0] for y in md5_table].index(tmp_md5)
  164. # print "Identified Letter " + str(md5_table[letter][1])
  165. sequence+=str(md5_table[letter][1])
  166. except ValueError:
  167. print "Unknown sample: " + "cut_"+str(i)+".wav"
  168. sequence+="?"
  169. errorflag = 1
  170. print "identified sequence: " + str(sequence)
  171. if errorflag == 0:
  172. counter += 1
  173. sendResponse(cookies,counter,"/index.php?captcha="+str(sequence))
  174. else:
  175. print "Please teach me"
  176. sys.exit(0)
  177.  
  178.