qa_model

  1 [code=python]
  2 import os
  3 import sys
  4 import time
  5 
  6 import numpy
  7 
  8 import shelve
  9 
 10 import theano
 11 import theano.tensor as T
 12 from theano.tensor.shared_randomstreams import RandomStreams
 13 
 14 class dA(object):
 15     """Denoising Auto-Encoder class (dA)
 16 
 17     A denoising autoencoders tries to reconstruct the input from a corrupted
 18     version of it by projecting it first in a latent space and reprojecting
 19     it afterwards back in the input space. Please refer to Vincent et al.,2008
 20     for more details. If x is the input then equation (1) computes a partially
 21     destroyed version of x by means of a stochastic mapping q_D. Equation (2)
 22     computes the projection of the input into the latent space. Equation (3)
 23     computes the reconstruction of the input, while equation (4) computes the
 24     reconstruction error.
 25 
 26     .. math::
 27 
 28         	ilde{x} ~ q_D(	ilde{x}|x)                                     (1)
 29 
 30         y = s(W 	ilde{x} + b)                                           (2)
 31 
 32         x = s(W' y  + b')                                                (3)
 33 
 34         L(x,z) = -sum_{k=1}^d [x_k log z_k + (1-x_k) log( 1-z_k)]      (4)
 35 
 36     """
 37 
 38     def __init__(
 39         self,
 40         numpy_rng,
 41         theano_rng=None,
 42         input=None,
 43         #n_visible=784,
 44         n_hidden=100,
 45         W=None,
 46         bhid=None,
 47         #bvis=None
 48     ):
 49         """
 50         Initialize the dA class by specifying the number of visible units (the
 51         dimension d of the input ), the number of hidden units ( the dimension
 52         d' of the latent or hidden space ) and the corruption level. The
 53         constructor also receives symbolic variables for the input, weights and
 54         bias. Such a symbolic variables are useful when, for example the input
 55         is the result of some computations, or when weights are shared between
 56         the dA and an MLP layer. When dealing with SdAs this always happens,
 57         the dA on layer 2 gets as input the output of the dA on layer 1,
 58         and the weights of the dA are used in the second stage of training
 59         to construct an MLP.
 60 
 61         :type numpy_rng: numpy.random.RandomState
 62         :param numpy_rng: number random generator used to generate weights
 63 
 64         :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
 65         :param theano_rng: Theano random generator; if None is given one is
 66                      generated based on a seed drawn from `rng`
 67 
 68         :type input: theano.tensor.TensorType
 69         :param input: a symbolic description of the input or None for
 70                       standalone dA
 71 
 72         :type n_hidden: int
 73         :param n_hidden:  number of hidden units
 74 
 75         :type W: theano.tensor.TensorType
 76         :param W: Theano variable pointing to a set of weights that should be
 77                   shared belong the dA and another architecture; if dA should
 78                   be standalone set this to None
 79 
 80         :type bhid: theano.tensor.TensorType
 81         :param bhid: Theano variable pointing to a set of biases values (for
 82                      hidden units) that should be shared belong dA and another
 83                      architecture; if dA should be standalone set this to None
 84 
 85         
 86 
 87         """
 88         #self.n_visible = n_visible
 89         self.n_hidden = n_hidden
 90 
 91         # create a Theano random generator that gives symbolic random values
 92         if not theano_rng:
 93             theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
 94 
 95         # note : W' was written as `W_prime` and b' as `b_prime`
 96         if not W:
 97             # W is initialized with `initial_W` which is uniformely sampled
 98             # from -4*sqrt(6./(n_visible+n_hidden)) and
 99             # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
100             # converted using asarray to dtype
101             # theano.config.floatX so that the code is runable on GPU
102             initial_W = numpy.asarray(
103                 numpy_rng.uniform(
104                     low=-4 * numpy.sqrt(6. / (n_hidden + n_hidden)),
105                     high=4 * numpy.sqrt(6. / (n_hidden + n_hidden)),
106                     size=(n_hidden, n_hidden)
107                 ),
108                 dtype=theano.config.floatX
109             )
110             W=theano.shared(value=initial_W, name='W', borrow=True)
111 
112         '''
113         if not bvis:
114             bvis = theano.shared(
115                 value=numpy.zeros(
116                     n_visible,
117                     dtype=theano.config.floatX
118                 ),
119                 borrow=True
120             )
121         '''
122         if not bhid:
123             bhid = theano.shared(
124                 value=numpy.zeros(
125                     n_hidden,
126                     dtype=theano.config.floatX
127                 ),
128                 name='b',
129                 borrow=True
130             )
131 
132         self.W = W
133         # b corresponds to the bias of the hidden
134         self.b = bhid
135         # b_prime corresponds to the bias of the visible
136         #self.b_prime = bvis
137         # tied weights, therefore W_prime is W transpose
138         #self.W_prime = self.W.T
139         self.theano_rng = theano_rng
140         # if no input is given, generate a variable representing the input
141         if input is None:
142             # we use a matrix because we expect a minibatch of several
143             # examples, each example being a row
144             self.x = T.dmatrix(name='input')
145         else:
146             self.x = input
147 
148         self.params = [self.W, self.b]
149     # end-snippet-1
150     def get_hidden_values(self):
151         """ Computes the values of the hidden layer """
152         return T.sum(T.nnet.sigmoid(T.dot(self.x, self.W) + self.b),axis = 0)
153 
154     '''
155     def get_corrupted_input(self, input, corruption_level):
156         """This function keeps ``1-corruption_level`` entries of the inputs the
157         same and zero-out randomly selected subset of size ``coruption_level``
158         Note : first argument of theano.rng.binomial is the shape(size) of
159                random numbers that it should produce
160                second argument is the number of trials
161                third argument is the probability of success of any trial
162 
163                 this will produce an array of 0s and 1s where 1 has a
164                 probability of 1 - ``corruption_level`` and 0 with
165                 ``corruption_level``
166 
167                 The binomial function return int64 data type by
168                 default.  int64 multiplicated by the input
169                 type(floatX) always return float64.  To keep all data
170                 in floatX when floatX is float32, we set the dtype of
171                 the binomial to floatX. As in our case the value of
172                 the binomial is always 0 or 1, this don't change the
173                 result. This is needed to allow the gpu to work
174                 correctly as it only support float32 for now.
175 
176         """
177         return self.theano_rng.binomial(size=input.shape, n=1,
178                                         p=1 - corruption_level,
179                                         dtype=theano.config.floatX) * input
180     '''
181     '''
182     
183     def get_reconstructed_input(self, hidden):
184         """Computes the reconstructed input given the values of the
185         hidden layer
186 
187         """
188         return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
189     
190 
191     def get_cost_updates(self, corruption_level, learning_rate):
192         """ This function computes the cost and the updates for one trainng
193         step of the dA """
194 
195         #tilde_x = self.get_corrupted_input(self.x, corruption_level)
196         y = self.get_hidden_values(tilde_x)
197         #z = self.get_reconstructed_input(y)
198         # note : we sum over the size of a datapoint; if we are using
199         #        minibatches, L will be a vector, with one entry per
200         #        example in minibatch
201         L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
202         # note : L is now a vector, where each element is the
203         #        cross-entropy cost of the reconstruction of the
204         #        corresponding example of the minibatch. We need to
205         #        compute the average of all these to get the cost of
206         #        the minibatch
207         cost = T.mean(L)
208 
209         # compute the gradients of the cost of the `dA` with respect
210         # to its parameters
211         gparams = T.grad(cost, self.params)
212         # generate the list of updates
213         updates = [
214             (param, param - learning_rate * gparam)
215             for param, gparam in zip(self.params, gparams)
216         ]
217 
218         return (cost, updates)
219     '''
220 
221 
222 x = T.fmatrix('x')  # question matrix
223 y = T.fmatrix('x')  # answer matrix
224 index = T.lscalar()
225 rng = numpy.random.RandomState(23455)
226 theano_rng = RandomStreams(rng.randint(2 ** 30))
227 n_hidden=2
228 learning_rate=0.1
229 da_q=[]
230 da_a=[]
231 for count in range(n_hidden):
232         da_q.append(dA(
233         numpy_rng=rng,
234         theano_rng=theano_rng,
235         input=x,
236         #n_visible=28 * 28,
237         n_hidden=100
238         ))
239     
240     
241 for count in range(n_hidden):
242         da_a.append(dA(
243         numpy_rng=rng,
244         theano_rng=theano_rng,
245         input=y,
246         #n_visible=28 * 28,
247         n_hidden=100
248         ))
249 cost_matrix=[]
250 for hid_index in range(n_hidden):
251         cost_matrix.append(T.sum(T.sqr(da_q[hid_index].get_hidden_values()-da_a[hid_index].get_hidden_values())/2))
252 cost=T.sum(cost_matrix)
253 params=da_q[0].params+da_a[hid_index].params
254 for hid_index in range(1,n_hidden):
255     params+=da_q[hid_index].params+da_a[hid_index].params
256 gparams=T.grad(cost, params)
257 updates = []
258 for param, gparam in zip(params, gparams):
259     updates.append((param, param - learning_rate * gparam))
260 db = shelve.open(r'data	raining_data	raining_data_30_50_1_9_games.dat')
261 x1=db['train_set1']
262 q,a=x1[0]
263 q1,a1=x1[1]
264 train_da = theano.function(
265         [index],
266         cost,
267         updates=updates,
268         givens={
269             x: x1[0][0],
270             y: x1[0][1]
271         }
272     )
273 print train_da(0)
274 [/code]